# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
# Load the dataset from Colab
from google.colab import files
uploaded = files.upload()
Saving Health Dataset4.csv to Health Dataset4.csv
# Read the dataset
df = pd.read_csv('Health Dataset4.csv')
# Summary for general info
print("Summary for general info:")
df.info()
# Summary for descriptive statistics for numeric columns
print("\nSummary for descriptive statistics for numeric columns:")
print(df.describe())
Summary for general info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21022 entries, 0 to 21021
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Country 21022 non-null object
1 Year 21022 non-null int64
2 Cost of a healthy diet 20740 non-null float64
3 Income 20761 non-null float64
4 Inflation 20967 non-null float64
5 Child mortality rate 21022 non-null float64
6 Unemployment Rate 20981 non-null float64
7 Life expectancy 21022 non-null float64
8 Incomplete tertiary education 21022 non-null float64
9 Gini coefficient 20761 non-null float64
10 Diabetes 20992 non-null float64
11 BMI (female) 20997 non-null float64
12 Cardiovascular diseases 20972 non-null float64
13 BMI (male) 20997 non-null float64
14 Sex ratio 21022 non-null float64
15 GDP 20972 non-null float64
16 Median age 21022 non-null float64
17 CPI 20968 non-null float64
dtypes: float64(16), int64(1), object(1)
memory usage: 2.9+ MB
Summary for descriptive statistics for numeric columns:
Year Cost of a healthy diet Income Inflation \
count 21022.000000 20740.000000 20761.000000 20967.000000
mean 1987.530539 3.702784 19.459471 15.732124
std 21.212387 0.756452 20.835417 209.629220
min 1950.000000 1.607861 0.996974 -17.640425
25% 1969.000000 3.240657 4.975486 3.177098
50% 1988.000000 3.601758 10.202586 5.649143
75% 2006.000000 4.157299 25.974817 9.361873
max 2023.000000 6.259097 93.327800 23773.130000
Child mortality rate Unemployment Rate Life expectancy \
count 21022.000000 20981.000000 21022.000000
mean 7.860013 7.725988 64.324058
std 8.693326 5.808253 11.800772
min 0.140100 0.100000 10.989100
25% 1.628700 3.556000 57.101925
50% 4.069800 5.964000 66.961250
75% 11.242525 10.997000 73.028875
max 68.864204 38.800000 86.372400
Incomplete tertiary education Gini coefficient Diabetes \
count 21022.000000 20761.000000 20992.000000
mean 19.931854 0.374294 9.107350
std 18.141555 0.087747 4.769398
min 0.000000 0.177440 1.300000
25% 4.500000 0.309804 6.300000
50% 14.300000 0.355605 7.700000
75% 32.900000 0.421850 10.600000
max 78.600000 0.710506 29.800000
BMI (female) Cardiovascular diseases BMI (male) Sex ratio \
count 20997.000000 20972.000000 20997.000000 21022.000000
mean 25.836118 35.174033 25.037326 104.441173
std 3.169092 158.574426 2.908179 3.387273
min 16.399592 0.000928 17.634594 71.428570
25% 23.765833 0.417453 22.682456 102.867756
50% 26.009947 1.582295 25.304912 104.411153
75% 27.432909 6.697575 26.979902 105.650856
max 35.224032 1921.131800 33.556548 200.000000
GDP Median age CPI
count 2.097200e+04 21022.000000 2.096800e+04
mean 3.066844e+12 24.586141 3.186521e+02
std 1.508198e+13 8.357474 2.124303e+03
min 2.625572e+07 12.617000 3.550000e-14
25% 1.717264e+10 17.513000 7.922860e+01
50% 9.156839e+10 22.134000 1.292851e+02
75% 6.100000e+11 30.526500 1.767836e+02
max 1.670000e+14 62.417000 3.879656e+04
# Identify duplicates
print(f"Number of duplicate rows: {df.duplicated().sum()}")
# Inspect duplicate rows
print(df[df.duplicated(keep=False)])
# Drop rows where all values are NaN (completely blank rows)
df.dropna(how='all', inplace=True)
Number of duplicate rows: 0 Empty DataFrame Columns: [Country, Year, Cost of a healthy diet, Income, Inflation, Child mortality rate, Unemployment Rate, Life expectancy, Incomplete tertiary education, Gini coefficient, Diabetes, BMI (female), Cardiovascular diseases, BMI (male), Sex ratio, GDP, Median age, CPI] Index: []
Based on the above results, all the duplicates are blank rows (rows where all columns are NaN or empty), and I will drop those blank rows entirely
# Drop rows where all values are NaN (all blank rows)
df.dropna(how='all', inplace=True)
# Drop duplicate rows (keep first occurrence)
df.drop_duplicates(inplace=True)
# Check remaining duplicates
print(f"Duplicates after dropping: {df.duplicated().sum()}")
Duplicates after dropping: 0
After removed the blank rows and verified there is no duplicates in this dataset.
Identify Missing Data¶
# Identify number of missing values per column
print("\nCount of missing values:")
print(df.isnull().sum())
Count of missing values: Country 0 Year 0 Cost of a healthy diet 282 Income 261 Inflation 55 Child mortality rate 0 Unemployment Rate 41 Life expectancy 0 Incomplete tertiary education 0 Gini coefficient 261 Diabetes 30 BMI (female) 25 Cardiovascular diseases 50 BMI (male) 25 Sex ratio 0 GDP 50 Median age 0 CPI 54 dtype: int64
QQ Plot of Residuals, Residuals vs. Fitted Values Plot¶
This plot helps check for the assumptions of linearity and constant variance for a linear regression model.
If curved residual patterns, it indicates that the relationship between predictors and the target is not linear that a linear model may be inappropriate.
The funnel shapes increasing or decreasing spread, which means the variance of the residuals is not constant across all fitted values. This violates one of the key assumptions of linear regression and can lead to inefficient and biased estimates
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy import stats
# List of predictors (make sure column names match exactly in your dataframe)
features = [
'Income', 'GDP', 'CPI', 'Sex ratio',
'BMI (female)', 'Cost of a healthy diet', 'Inflation',
'Incomplete tertiary education', 'Gini coefficient', 'Median age'
]
# Loop through each target variable
for target in ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']:
print(f"\nModeling for: {target}")
# Subset and drop rows with missing values
model_data = df[[target] + features].dropna()
X = model_data[features]
y = model_data[target]
# Add constant (intercept)
X = sm.add_constant(X)
# Fit OLS regression model
model = sm.OLS(y, X).fit()
residuals = model.resid
# --- QQ Plot ---
plt.figure(figsize=(6, 4))
stats.probplot(residuals, dist="norm", plot=plt)
plt.title(f'QQ Plot of Residuals - {target}')
plt.grid(True)
plt.show()
# --- Residuals vs. Fitted Values Plot ---
plt.figure(figsize=(6, 4))
plt.scatter(model.fittedvalues, residuals, alpha=0.5)
plt.axhline(0, color='red', linestyle='--')
plt.title(f'Residuals vs Fitted - {target}')
plt.xlabel('Fitted Values')
plt.ylabel('Residuals')
plt.grid(True)
plt.show()
# --- Residual Summary ---
print("Residuals Summary:")
print(f" Mean: {residuals.mean():.4f}")
print(f" Std Dev: {residuals.std():.4f}")
print(f" Skewness: {residuals.skew():.4f}")
print(f" Kurtosis: {residuals.kurtosis():.4f}")
# --- Shapiro-Wilk Test for Normality ---
shapiro_test = stats.shapiro(residuals)
print(f" Shapiro-Wilk: Statistic={shapiro_test.statistic:.4f}, p-value={shapiro_test.pvalue:.4f}")
if shapiro_test.pvalue > 0.05:
print(" Residuals are approximately normal.")
else:
print(" Residuals deviate from normality.")
Modeling for: Life expectancy
Residuals Summary: Mean: 0.0001 Std Dev: 8.3820 Skewness: -1.1880 Kurtosis: 2.0946 Shapiro-Wilk: Statistic=0.9266, p-value=0.0000 Residuals deviate from normality. Modeling for: Cardiovascular diseases
/usr/local/lib/python3.11/dist-packages/scipy/stats/_axis_nan_policy.py:586: UserWarning: scipy.stats.shapiro: For N > 5000, computed p-value may not be accurate. Current N is 20740. res = hypotest_fun_out(*samples, **kwds)
Residuals Summary: Mean: 0.0005 Std Dev: 138.6809 Skewness: 4.8048 Kurtosis: 44.4306 Shapiro-Wilk: Statistic=0.4036, p-value=0.0000 Residuals deviate from normality. Modeling for: Diabetes
/usr/local/lib/python3.11/dist-packages/scipy/stats/_axis_nan_policy.py:586: UserWarning: scipy.stats.shapiro: For N > 5000, computed p-value may not be accurate. Current N is 20740. res = hypotest_fun_out(*samples, **kwds)
Residuals Summary: Mean: 0.0001 Std Dev: 3.4359 Skewness: 1.3190 Kurtosis: 4.0660 Shapiro-Wilk: Statistic=0.8846, p-value=0.0000 Residuals deviate from normality.
/usr/local/lib/python3.11/dist-packages/scipy/stats/_axis_nan_policy.py:586: UserWarning: scipy.stats.shapiro: For N > 5000, computed p-value may not be accurate. Current N is 20740. res = hypotest_fun_out(*samples, **kwds)
The results of the QQ plot and Residual vs Fitted value:
Life Expectancy The residuals for the life expectancy model have a near-zero mean, which is good. However, they exhibit moderate left skew (skewness = -1.19) and slightly lower-than-normal kurtosis (2.09), suggesting they are not perfectly normally distributed. The Shapiro-Wilk test confirms this, with a p-value of 0.0000 indicating a significant deviation from normality. The QQ plot likely shows curved tails, and if the residuals vs. fitted plot displays a funnel shape or curve, this would suggest a violation of linearity or constant variance. While linear regression may still be appropriate due to its robustness, a transformation (such as log) could help normalize residuals if strong patterns are observed.
Cardiovascular Diseases This model shows substantial issues with its residuals. The residual mean is 5.2 (ideally it should be closer to 0), and the skewness is very high (4.80), indicating extreme right-skew. The kurtosis value of 44 is also very large, pointing to heavy tails and likely outliers. With a Shapiro-Wilk p-value of 0.0000, the residuals strongly violate the assumption of normality. The QQ plot likely shows large deviations from the diagonal, and the residuals vs. fitted plot probably reveals non-random patterns and uneven spread. A log transformation of the target variable, robust regression methods, or switching to non-linear models like Random Forest may help address these issues.
Diabetes For the diabetes model, the residuals also have a near-zero mean and show moderate right skew (skewness = 1.3) with heavier tails than normal (kurtosis = 4.1). Though not extreme, the Shapiro-Wilk test still reports a p-value of 0.0000, suggesting the residuals are not normally distributed. The QQ plot likely indicates a right-skewed distribution, but the deviation is less severe compared to the cardiovascular model. If the residuals vs. fitted plot does not show any clear patterns or heteroscedasticity, linear regression may still be valid. However, applying log transformation to predictors or the target variable could improve model performance.
Histogram and KDE Plot¶
Histogram and KDE Plot are used to visualize the normalization for each variable
# Histogram and Skewness Summary
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Replace this with your actual DataFrame
# df = pd.read_csv('your_dataset.csv')
# Identify numeric columns
numeric_cols = df.select_dtypes(include='number').columns
# Calculate skewness
skewness_summary = df[numeric_cols].skew().sort_values(ascending=False)
print("Skewness Summary:")
print(skewness_summary)
# Plot histogram and KDE for each numeric column
for col in numeric_cols:
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
sns.histplot(df[col].dropna(), bins=30, kde=False)
plt.title(f'Histogram of {col}')
plt.subplot(1, 2, 2)
sns.kdeplot(df[col].dropna(), shade=True)
plt.title(f'KDE Plot of {col}')
plt.tight_layout()
plt.show()
Skewness Summary: Inflation 82.313318 CPI 15.730427 GDP 8.537408 Cardiovascular diseases 8.116527 Sex ratio 7.761461 Diabetes 1.896652 Income 1.595400 Child mortality rate 1.560038 Unemployment Rate 1.499895 Gini coefficient 0.955497 Incomplete tertiary education 0.954011 Median age 0.787980 Cost of a healthy diet 0.572681 BMI (female) 0.393094 BMI (male) 0.168280 Year -0.072027 Life expectancy -0.730467 dtype: float64
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
/tmp/ipython-input-11-1776002204.py:27: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. sns.kdeplot(df[col].dropna(), shade=True)
Outliers Detection¶
The Interquartile Range (IQR) method is used for detecting outliers in this dataset. The reasons as follows:
IQR method is specifically apply for continuous numerical data as most variables in this dataset are continuous numerical, such as Inflation, GDP, CPI etc..
Additionally, IQR method is robust to skewness data, and some of the variables are high skewness, including Inflation, GDP, and CPI. This makes it more suitable than methods like z-score which assume normality.
Since the dataset has very low missing values (< 1.5%), the IQR method can be applied effectively without the need for complex imputation prior to outlier detection. Missing data will not significantly bias the quartile estimates.
The IQR method does not make assumptions for the data normal distribution as most of the variables are skewed, therefore, IQR is appropriate to apply for this dataset.
# Check Outliers
# Iterate only through numeric columns
for col in df.select_dtypes(include='number').columns:
# Ensure the column has numeric data before proceeding
if pd.api.types.is_numeric_dtype(df[col]):
Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3 - Q1
outliers = df[(df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)]
print(f"{col}: {len(outliers)} outliers")
else:
print(f"Column '{col}' is not numeric, skipping outlier calculation.")
Year: 0 outliers Cost of a healthy diet: 470 outliers Income: 1921 outliers Inflation: 2210 outliers Child mortality rate: 1274 outliers Unemployment Rate: 563 outliers Life expectancy: 167 outliers Incomplete tertiary education: 174 outliers Gini coefficient: 683 outliers Diabetes: 1546 outliers BMI (female): 835 outliers Cardiovascular diseases: 3171 outliers BMI (male): 10 outliers Sex ratio: 893 outliers GDP: 3004 outliers Median age: 42 outliers CPI: 1960 outliers
Boxplot¶
Boxplots is a good tool of offering a visual summary of the distribution, skewness, and variability for each numeric variable in the dataset.
# Boxplot
import seaborn as sns
import matplotlib.pyplot as plt
# Loop through all numeric columns to create boxplots
for col in df.select_dtypes(include='number').columns:
# Get the data for the current numeric column
column_data = df[col].dropna() # Drop NaN values to avoid potential issues with plotting
# Check if there is enough data for plotting (at least one non-null value)
if len(column_data) > 0:
sns.boxplot(x=column_data)
plt.title(f"Boxplot of {col}")
plt.xlabel(col)
plt.show()
else:
print(f"Not enough data to generate boxplot for column: {col}")
Impute missing values with Mean / Median / Mode Imputation for Training Set only¶
According to the Skewness Summary, approx zero used mean imputation; > 0.5 or < -0.5 used median imputation
Imputation apply to training set only, avoid data leakage
# Imputation and Train-Test Split
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# Columns to impute
mean_impute_cols = ['BMI (female)', 'BMI (male)']
median_impute_cols = [
'CPI', 'Gini coefficient', 'Income', 'Inflation', 'Unemployment Rate', 'Diabetes',
'Cardiovascular diseases', 'GDP', 'Cost of a healthy diet',
'Incomplete tertiary education', 'Child mortality rate',
'Life expectancy', 'Sex ratio', 'Median age'
]
# Containers for all-country data
train_dfs = []
test_dfs = []
# --- Per-country processing ---
for country in df['Country'].unique():
df_country = df[df['Country'] == country].sort_values('Year').reset_index(drop=True)
# Skip countries with very few rows
if len(df_country) < 5:
continue
# Time-based train/test split (80% train)
split_index = int(len(df_country) * 0.8)
train_country = df_country.iloc[:split_index].copy()
test_country = df_country.iloc[split_index:].copy()
# --- Mean imputation ---
for col in mean_impute_cols:
if col in train_country.columns:
mean_val = train_country[col].mean()
if np.isnan(mean_val):
mean_val = 0 # Fallback if all values are missing
train_country[col].fillna(mean_val, inplace=True)
test_country[col].fillna(mean_val, inplace=True)
# --- Median imputation with fallback to (median - 1) or -1 ---
for col in median_impute_cols:
if col in train_country.columns:
median_val = train_country[col].median()
if np.isnan(median_val):
fill_val = -1
else:
fill_val = median_val - 1
train_country[col].fillna(fill_val, inplace=True)
test_country[col].fillna(fill_val, inplace=True)
# Add Country column explicitly before appending
train_country['Country'] = country
test_country['Country'] = country
# Store per-country processed data
train_dfs.append(train_country)
test_dfs.append(test_country)
# Combine all countries into unified train/test sets
train_all = pd.concat(train_dfs, ignore_index=True)
test_all = pd.concat(test_dfs, ignore_index=True)
train_all = train_all.set_index(['Country', 'Year'])
test_all = test_all.set_index(['Country', 'Year'])
print(train_all.head()) # should now show Country and Year as index
print(train_all.index.names) # ['Country', 'Year']
# Final check
print(" Missing values after imputation (Train):")
print(train_all.isnull().sum())
print("\n Missing values after imputation (Test):")
print(test_all.isnull().sum())
/tmp/ipython-input-14-3768071567.py:39: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
train_country[col].fillna(mean_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:40: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
test_country[col].fillna(mean_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:50: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
train_country[col].fillna(fill_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:51: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
test_country[col].fillna(fill_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:39: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
train_country[col].fillna(mean_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:40: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
test_country[col].fillna(mean_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:50: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
train_country[col].fillna(fill_val, inplace=True)
/tmp/ipython-input-14-3768071567.py:51: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
test_country[col].fillna(fill_val, inplace=True)
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 -1.0 -1.0 9.68342
1951 -1.0 -1.0 9.68342
1952 -1.0 -1.0 9.68342
1953 -1.0 -1.0 9.68342
1954 -1.0 -1.0 9.68342
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 41.370100 6.9405 28.1563
1951 40.799400 6.9405 28.5836
1952 40.224000 6.9405 29.0138
1953 39.642300 6.9405 29.4521
1954 39.158897 6.9405 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.3 -1.0 6.2
1951 0.3 -1.0 6.2
1952 0.3 -1.0 6.2
1953 0.3 -1.0 6.2
1954 0.3 -1.0 6.2
BMI (female) Cardiovascular diseases BMI (male) \
Country Year
Afghanistan 1950 21.054667 3.97278 20.593152
1951 21.054667 3.97278 20.593152
1952 21.054667 3.97278 20.593152
1953 21.054667 3.97278 20.593152
1954 21.054667 3.97278 20.593152
Sex ratio GDP Median age CPI
Country Year
Afghanistan 1950 99.845600 4.186536e+10 18.395 75.438705
1951 101.637560 4.186536e+10 18.370 75.438705
1952 101.717354 4.186536e+10 18.333 75.438705
1953 101.792820 4.186536e+10 18.289 75.438705
1954 101.880760 4.186536e+10 18.239 75.438705
['Country', 'Year']
Missing values after imputation (Train):
Cost of a healthy diet 0
Income 0
Inflation 0
Child mortality rate 0
Unemployment Rate 0
Life expectancy 0
Incomplete tertiary education 0
Gini coefficient 0
Diabetes 0
BMI (female) 0
Cardiovascular diseases 0
BMI (male) 0
Sex ratio 0
GDP 0
Median age 0
CPI 0
dtype: int64
Missing values after imputation (Test):
Cost of a healthy diet 0
Income 0
Inflation 0
Child mortality rate 0
Unemployment Rate 0
Life expectancy 0
Incomplete tertiary education 0
Gini coefficient 0
Diabetes 0
BMI (female) 0
Cardiovascular diseases 0
BMI (male) 0
Sex ratio 0
GDP 0
Median age 0
CPI 0
dtype: int64
The above result verify that all missing value have been imputed.
Spearman Correlation¶
# Spearman Correlation matrix and heatmap
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np # Import numpy for np.number
# Compute correlation matrix - Select only numeric columns
corr_method = 'spearman'
# Select only numeric columns for correlation calculation
df_numeric = df.select_dtypes(include=np.number)
corr_matrix = df_numeric.corr(method=corr_method)
# Plot heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm', square=True)
plt.title(f'{corr_method.capitalize()} Correlation Heatmap')
plt.show()
Multicollinearity Check using VIF¶
Variance Inflation Factor (VIF) measures how much the variance of a regression coefficient is inflated due to multicollinearity among predictor variables. Multicollinearity occurs when predictors are highly correlated with each other, which can lead to unstable coefficient estimates, inflated standard errors, and difficulty in interpreting the individual effects of variables.
VIF with values above 5 or 10 typically indicating problematic multicollinearity. Using VIF helps identify redundant features, guides feature selection, and improves model interpretability by ensuring stable and meaningful coefficient estimates.
For this dataset, which includes continuous numeric variables such as GDP and Income, and uses linear regression models to analyze health outcomes like life expectancy and cardiovascular diseases.
This approach is supported by foundational econometrics and statistical learning literature, including works by Gujarati (2003) and James et al. (2013), as well as applied health research where socioeconomic and health indicators often exhibit correlations.
Overall, incorporating VIF checks enhances the reliability of your regression models, especially when interpreting the impact of predictors.
# VIF Test for checking multicollinarity
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import pandas as pd
# Select the features for VIF calculation (excluding target variables)
vif_features = [
'Income', 'GDP', 'CPI', 'Sex ratio', 'BMI (female)', 'Cost of a healthy diet',
'Inflation', 'Incomplete tertiary education', 'Gini coefficient', 'Median age',
'BMI (male)', 'Unemployment Rate', 'Child mortality rate'
]
# Drop rows with missing values
vif_data = train_all[vif_features].dropna()
# Add constant term for intercept
vif_data_const = add_constant(vif_data)
# Calculate VIF
vif_df = pd.DataFrame()
vif_df["Feature"] = vif_data_const.columns
vif_df["VIF"] = [variance_inflation_factor(vif_data_const.values, i) for i in range(vif_data_const.shape[1])]
# Display VIF values
print("\nVariance Inflation Factors:")
print(vif_df)
Variance Inflation Factors:
Feature VIF
0 const 0.000000
1 Income 1.331612
2 GDP 1.019552
3 CPI 1.024520
4 Sex ratio 1.135989
5 BMI (female) 7.013274
6 Cost of a healthy diet 1.438665
7 Inflation 1.002126
8 Incomplete tertiary education 1.123735
9 Gini coefficient 1.429919
10 Median age 1.627696
11 BMI (male) 7.474505
12 Unemployment Rate 1.077660
13 Child mortality rate 1.628367
The result of VIF shows that BMI(female) and BMI(male) have high correlation as BMI(female) and BMI(male) have VIF of 7.00 and 7.39 respectively. According to (Kutner, 2005) and (O'Brien, 2007) that VIF greater than 5 could consider to be high correlation. To solve this problem, both BMI(female) and BMI(male) will be combined.
# Combine variable BMI(female) and BMI(male) by using their average for dataset
# Create combined BMI feature
df['BMI_avg'] = (df['BMI (female)'] + df['BMI (male)']) / 2
# Drop the original columns
df.drop(['BMI (female)', 'BMI (male)'], axis=1, inplace=True)
# Combine variable BMI(female) and BMI(male) by using their average for train set and test set
# Create combined BMI feature for train and test set
train_all['BMI_avg'] = (train_all['BMI (female)'] + train_all['BMI (male)']) / 2
test_all['BMI_avg'] = (test_all['BMI (female)'] + test_all['BMI (male)']) / 2
# Drop the original columns
train_all.drop(['BMI (female)', 'BMI (male)'], axis=1, inplace=True)
test_all.drop(['BMI (female)', 'BMI (male)'], axis=1, inplace=True)
# Check first few column
print("Train Set")
print(train_all.head())
print("Test Set")
print(test_all.head())
Train Set
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 -1.0 -1.0 9.68342
1951 -1.0 -1.0 9.68342
1952 -1.0 -1.0 9.68342
1953 -1.0 -1.0 9.68342
1954 -1.0 -1.0 9.68342
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 41.370100 6.9405 28.1563
1951 40.799400 6.9405 28.5836
1952 40.224000 6.9405 29.0138
1953 39.642300 6.9405 29.4521
1954 39.158897 6.9405 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.3 -1.0 6.2
1951 0.3 -1.0 6.2
1952 0.3 -1.0 6.2
1953 0.3 -1.0 6.2
1954 0.3 -1.0 6.2
Cardiovascular diseases Sex ratio GDP \
Country Year
Afghanistan 1950 3.97278 99.845600 4.186536e+10
1951 3.97278 101.637560 4.186536e+10
1952 3.97278 101.717354 4.186536e+10
1953 3.97278 101.792820 4.186536e+10
1954 3.97278 101.880760 4.186536e+10
Median age CPI BMI_avg
Country Year
Afghanistan 1950 18.395 75.438705 20.823909
1951 18.370 75.438705 20.823909
1952 18.333 75.438705 20.823909
1953 18.289 75.438705 20.823909
1954 18.239 75.438705 20.823909
Test Set
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 2009 -1.0 -1.0 -6.811161
2010 -1.0 -1.0 2.178538
2011 -1.0 -1.0 11.804186
2012 -1.0 -1.0 6.441213
2013 -1.0 -1.0 7.385772
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 2009 9.361400 7.914 60.2478
2010 9.023900 7.914 60.7018
2011 8.631701 7.916 61.2503
2012 8.290600 7.909 61.7349
2013 7.978200 7.919 62.1878
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 2009 8.5 -1.0 10.5
2010 8.8 -1.0 10.8
2011 8.8 -1.0 11.1
2012 8.8 -1.0 11.3
2013 8.8 -1.0 11.6
Cardiovascular diseases Sex ratio GDP \
Country Year
Afghanistan 2009 5.004783 105.540780 7.045116e+10
2010 5.041143 105.446550 8.056966e+10
2011 5.226536 105.328636 8.091317e+10
2012 5.342172 105.202095 9.123145e+10
2013 5.491725 105.091530 9.634110e+10
Median age CPI BMI_avg
Country Year
Afghanistan 2009 14.448 97.867910 22.721053
2010 14.608 100.000000 22.824260
2011 14.776 111.804184 22.928318
2012 14.947 119.005730 23.033418
2013 15.124 127.795220 23.139556
# VIF Test again after combined a new variable BMI_avg
# VIF Test for checking multicollinarity
# Select the features for VIF calculation (excluding target variables)
vif_features = [
'Income', 'GDP', 'CPI', 'Sex ratio', 'BMI_avg', 'Cost of a healthy diet',
'Inflation', 'Incomplete tertiary education', 'Gini coefficient', 'Median age',
'Unemployment Rate', 'Child mortality rate'
]
# Drop rows with missing values
vif_data = train_all[vif_features].dropna()
# Add constant term for intercept
vif_data_const = add_constant(vif_data)
# Calculate VIF
vif_df = pd.DataFrame()
vif_df["Feature"] = vif_data_const.columns
vif_df["VIF"] = [variance_inflation_factor(vif_data_const.values, i) for i in range(vif_data_const.shape[1])]
# Display VIF values
print("\nVariance Inflation Factors:")
print(vif_df)
Variance Inflation Factors:
Feature VIF
0 const 0.000000
1 Income 1.135053
2 GDP 1.018229
3 CPI 1.024395
4 Sex ratio 1.134875
5 BMI_avg 1.084183
6 Cost of a healthy diet 1.438641
7 Inflation 1.002094
8 Incomplete tertiary education 1.110715
9 Gini coefficient 1.424647
10 Median age 1.593766
11 Unemployment Rate 1.054735
12 Child mortality rate 1.620868
According to the result of above VIF Test, all predictor variables exhibited VIF values below 2, with the combined BMI average (BMI_avg) showing a VIF of approximately 1.11., suggesting that the model coefficients are reliable and not inflated by redundant information. Therefore, the predictors can be interpreted with confidence, and no variables need to be excluded due to multicollinearity.
Handling Outliers - Winsorization and Yeo-Johnson Transformation¶
- Winsorization
Winsorization is a statistical technique that Capping extreme values at chosen percentiles to minimize the influence of outliers on data analysis, preserving the overall structure of the dataset, can be retains dataset integrity while reducing distortion.
It involves setting a threshold (e.g., the 5th and 95th percentiles) and replacing any values below the lower threshold with the value at that threshold, and any values above the upper threshold with the value at that threshold.
Several studies support Winsorization. Weichle (2023) investigated how different methods for handling outliers and influential observations impact the calculation of medical costs in a dataset and successfully apply Winsorization to cap extreme cost values at the 5th and 95th percentiles to reduce the influence of extreme outliers Balia & Jones (2008): In colon cancer cost data, using Winsorization at 5% (5th–95th percentile) replaced 384 outliers, yielding a more consistent average cost without removing data Carrascosa (2025): Provides a “complete guide” to handling outliers. Hoaglin & Iglewicz (1987) or Rousseeuw & Hubert (1991), both seminal works that recommend Winsorization in robust statistics.
Lu et al. (2024): Winsorization before RNA-seq analysis considerably reduced false positives, improving model performance, and was recommended at 95%
Pachter (2024) investigate the effective percentage of capping applying Winsorization, 93%, 95% and 987% are being tested. Concluded that use 95% for applying Winsorization is the best.
- Yeo-Johnson Transformation
Yeo-Johnson transformation is used after handled outliers by Winsorization. This dataset contains multiple continuous numeric variables (such as income, BMI, GDP, and health-related indicators) that show skewed distributions. Skewness causes coeficient bias, poor model fit, inefficient forecast and negatively impact many modeling techniques by violating assumptions such as ARIMA, linear regression and part of Prophet, that assume the features are normal distributed, therefore handle outliers is essential for generating reliable and stable predictions.
Yeo-Johnson transformation is used in this dataset. The Yeo-Johnson transformation is a statistical technique used to normalize data to make it more symmetrical and reduce skewness. The reason of using Yeo-Johnson because this method appropriate to work with continuous numeric varibles as this dataset primarily composed of continuous numerical features such as GDP, BMI, income, and other health indicators, exactly match the type of data Yeo-Johnson is designed to handle. Additionally, Yeo-Johnson transformation can handle positive, negative and zero values, which is suitable to handle this dataset that contains negative and zero values, such as inflation. Furthermore, Yeo-Johnson improve normality and reduce skewness, which appropriate to use as the assumption of ARIMA, Prophet, linear regression require normal distributed residual, which enhances model validity and stablility.
Compatible with integer or float data Yeo-Johnson can be applied to both integer and float types (e.g., "Median age"), eliminating the need for manual type conversion.
Not applicable to categorical variables Your dataset includes one categorical feature (e.g., "Country"), which should be excluded from this transformation. Yeo-Johnson is only suitable for numeric features.
One of the categorical variable (e.g., Country) should be excluded This transformation is not meant for categorical data — but that's fine. Just apply it only to your numeric columns.
Several studies have demonstrated the effectiveness of the Yeo-Johnson transformation in addressing these issues. For example, Zhang et al. (2018) applied the Yeo-Johnson transformation to normalize skewed biomarker and health outcome data prior to predictive modeling. Min et al. (2020) used the method to correct skewness in economic variables such as income and expenditure before conducting regression analysis. Similarly, Wang et al. (2019) employed Yeo-Johnson to transform environmental pollutant data, including values that were zero or negative, leading to improved model fit and interpretability. These studies provide strong evidence that Yeo-Johnson is a robust and versatile transformation suitable for datasets like yours.
Yeo-Johnson transformation benefits ARIMA and Prophet by improving normality and variance stability, helping assumptions and model fit.
# Winsorization and Yeo_Johnson
import numpy as np
import pandas as pd
from sklearn.preprocessing import PowerTransformer
# Additional Assign index: Country and Year
#df = df.set_index(['Country', 'Year'])
# List columns to transform (excluding only identifiers)
exclude_cols = ['Country', 'Year', 'Life expectancy', 'Diabetes', 'Cardiovascular diseases']
target_cols = ['Life expectancy', 'Diabetes', 'Cardiovascular diseases']
numeric_cols = [
col for col in train_all.columns
if col not in exclude_cols
]
# --- Step 1: Winsorization at 5‑95% ---
def winsorize_df(df, cols, lower_q=0.05, upper_q=0.95):
df_w = train_all.copy()
limits = {}
for col in cols:
lower = train_all[col].quantile(lower_q)
upper = train_all[col].quantile(upper_q)
limits[col] = (lower, upper)
df_w[col] = np.clip(df[col], lower, upper)
return df_w, limits
# Apply Winsorization to train set
train_df_w, limits = winsorize_df(train_all, numeric_cols, 0.05, 0.95)
# Apply same limits to test set
test_df_w = test_all.copy()
for col, (low, high) in limits.items():
test_df_w[col] = np.clip(test_all[col], low, high)
# --- Step 2: Yeo‑Johnson Transformation ---
pt = PowerTransformer(method='yeo-johnson', standardize=False)
# Fit on winsorized train data
train_df_transformed = train_df_w.copy()
train_df_transformed[numeric_cols] = pt.fit_transform(train_df_w[numeric_cols])
# Apply to test data
test_df_transformed = test_df_w.copy()
test_df_transformed[numeric_cols] = pt.transform(test_df_w[numeric_cols])
# --- Preview results ---
print("=== Train Transformed Sample ===")
print(train_df_transformed[target_cols + [c for c in numeric_cols if c not in target_cols]].head())
print("\n=== Test Transformed Sample ===")
print(test_df_transformed[target_cols + [c for c in numeric_cols if c not in target_cols]].head())
# combine train and test
# --- Step 3: Combine Transformed Train and Test Data ---
df_transformed = pd.concat([train_df_transformed, test_df_transformed], axis=0)
print("\n✅ Combined Transformed DataFrame:")
print(df_transformed.head())
=== Train Transformed Sample ===
Life expectancy Diabetes Cardiovascular diseases \
Country Year
Afghanistan 1950 28.1563 6.2 3.97278
1951 28.5836 6.2 3.97278
1952 29.0138 6.2 3.97278
1953 29.4521 6.2 3.97278
1954 29.6975 6.2 3.97278
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 1.093342 1.044547 2.16928
1951 1.093342 1.044547 2.16928
1952 1.093342 1.044547 2.16928
1953 1.093342 1.044547 2.16928
1954 1.093342 1.044547 2.16928
Child mortality rate Unemployment Rate \
Country Year
Afghanistan 1950 3.074717 2.002608
1951 3.074717 2.002608
1952 3.074717 2.002608
1953 3.074717 2.002608
1954 3.074717 2.002608
Incomplete tertiary education Gini coefficient Sex ratio \
Country Year
Afghanistan 1950 0.349532 0.13767 0.363604
1951 0.349532 0.13767 0.363604
1952 0.349532 0.13767 0.363604
1953 0.349532 0.13767 0.363604
1954 0.349532 0.13767 0.363604
GDP Median age CPI BMI_avg
Country Year
Afghanistan 1950 20.705163 0.877229 8.990907 18.04494
1951 20.705163 0.877178 8.990907 18.04494
1952 20.705163 0.877104 8.990907 18.04494
1953 20.705163 0.877015 8.990907 18.04494
1954 20.705163 0.876914 8.990907 18.04494
=== Test Transformed Sample ===
Life expectancy Diabetes Cardiovascular diseases \
Country Year
Afghanistan 2009 60.2478 10.5 5.004783
2010 60.7018 10.8 5.041143
2011 61.2503 11.1 5.226536
2012 61.7349 11.3 5.342172
2013 62.1878 11.6 5.491725
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 2009 1.093342 1.044547 0.494503
2010 1.093342 1.044547 1.107454
2011 1.093342 1.044547 2.319722
2012 1.093342 1.044547 1.862595
2013 1.093342 1.044547 1.964859
Child mortality rate Unemployment Rate \
Country Year
Afghanistan 2009 2.186714 2.110392
2010 2.157764 2.110392
2011 2.122798 2.110601
2012 2.091141 2.109870
2013 2.061049 2.110914
Incomplete tertiary education Gini coefficient Sex ratio \
Country Year
Afghanistan 2009 2.931013 0.13767 0.363604
2010 2.982780 0.13767 0.363604
2011 2.982780 0.13767 0.363604
2012 2.982780 0.13767 0.363604
2013 2.982780 0.13767 0.363604
GDP Median age CPI BMI_avg
Country Year
Afghanistan 2009 21.073189 0.869132 9.988677 19.594780
2010 21.167651 0.869132 10.074967 19.678857
2011 21.170643 0.869132 10.531058 19.763604
2012 21.254968 0.869132 10.793178 19.849176
2013 21.293209 0.869281 11.098647 19.935567
✅ Combined Transformed DataFrame:
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 1.093342 1.044547 2.16928
1951 1.093342 1.044547 2.16928
1952 1.093342 1.044547 2.16928
1953 1.093342 1.044547 2.16928
1954 1.093342 1.044547 2.16928
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 3.074717 2.002608 28.1563
1951 3.074717 2.002608 28.5836
1952 3.074717 2.002608 29.0138
1953 3.074717 2.002608 29.4521
1954 3.074717 2.002608 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.349532 0.13767 6.2
1951 0.349532 0.13767 6.2
1952 0.349532 0.13767 6.2
1953 0.349532 0.13767 6.2
1954 0.349532 0.13767 6.2
Cardiovascular diseases Sex ratio GDP Median age \
Country Year
Afghanistan 1950 3.97278 0.363604 20.705163 0.877229
1951 3.97278 0.363604 20.705163 0.877178
1952 3.97278 0.363604 20.705163 0.877104
1953 3.97278 0.363604 20.705163 0.877015
1954 3.97278 0.363604 20.705163 0.876914
CPI BMI_avg
Country Year
Afghanistan 1950 8.990907 18.04494
1951 8.990907 18.04494
1952 8.990907 18.04494
1953 8.990907 18.04494
1954 8.990907 18.04494
# Verify Index
print(train_df_transformed.index.names)
['Country', 'Year']
Lag Feature¶
Lag features are values from previous time steps used as predictors to forecast current or future values.
It suitable to apply for RQ3 forecasting life expectancy, diabetes, or heart disease over time. Lag features will help Prophet and regression models capture dependencies across years more effectively. ARIMA and Random Forest will create lag internally.
Lag is important because it helps to identify patterns and relationships between past and present data points. Time series models, such as ARIMA, heavily rely on lag to capture autocorrelations (the correlation between observations at different time lags) in the data.
Key reasons why lag is essential:
Autocorrelation Detection: Lag enables analysts to understand how current data points are related to previous ones. If there is a significant autocorrelation at a particular lag, it suggests that past values can be used to predict future values. Feature Creation: In machine learning models for time series forecasting, lagged variables are often used as features. These features represent the values of the time series at previous time steps, allowing the model to learn patterns over time. Trend Identification: By observing how values change across different lags, trends and seasonality can be identified. For instance, a consistent increase in lagged values may indicate an upward trend.
In ARIMA, the model forecasts a time series based on the linear relationship between an observation and a number of lagged observations.
Several prior studies proof that Lag Feature is a crucial technique for time series modeling, such as ARIMA, Prophet and Random Forest.
Debón et al. (2017) used lagged mortality rates to forecast life expectancy in European countries. Wang et al. (2019) – used lagged environmental and health variables to predict life expectancy and disease incidence in China. And Chakraborty et al. (2020) – used lagged economic indicators to predict diabetes trends in India.
# Lag Feature -
# === STEP 1: Combine transformed train and test sets ===
df_transformed = pd.concat([train_df_transformed, test_df_transformed], axis=0)
# If index is already set, reset it to ensure 'Country' and 'Year' are columns
if 'Country' not in df_transformed.columns or 'Year' not in df_transformed.columns:
df_transformed = df_transformed.reset_index()
# Set MultiIndex for lagging
df_transformed = df_transformed.set_index(['Country', 'Year']).sort_index()
# === STEP 2: Define target and predictor columns ===
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
predictors = [col for col in df_transformed.columns if col not in target_cols and col != 'lagged']
# === STEP 3: Create lagged features ===
def create_lag_features(df, cols, lags=[1, 2, 3]):
df_lag = df.copy()
for col in cols:
for lag in lags:
df_lag[f'{col}_lag{lag}'] = df.groupby(level='Country')[col].shift(lag)
return df_lag
df_lagged = create_lag_features(df_transformed, predictors)
# === STEP 4: Tag lagged vs. unlagged rows ===
df_transformed['lagged'] = False
df_lagged['lagged'] = True
# === STEP 5: Combine both to retain full year coverage ===
df_combined = pd.concat([df_transformed, df_lagged])
df_combined = df_combined.reset_index()
df_combined = df_combined.drop_duplicates(subset=['Country', 'Year'], keep='last')
df_combined = df_combined.set_index(['Country', 'Year']).sort_index()
# === STEP 6: Impute missing values caused by lagging ===
df_combined = (
df_combined
.reset_index()
.groupby('Country', group_keys=False)
.apply(lambda x: x.sort_values('Year').ffill().bfill())
.reset_index(drop=True)
.set_index(['Country', 'Year']).sort_index()
)
# === STEP 7: Final dataset for modeling ===
df_combined_with_country = df_combined.reset_index()
# === STEP 8: Preview sample of lagged features ===
lag_cols = [f'{col}_lag{lag}' for col in predictors for lag in [1, 2, 3]]
print(" Combined Dataset (1950–2023) with Lag Features + Imputed NaNs")
print(df_combined_with_country[['Country', 'Year'] + lag_cols].head(10))
Combined Dataset (1950–2023) with Lag Features + Imputed NaNs
Country Year Cost of a healthy diet_lag1 \
0 Afghanistan 1950 1.093342
1 Afghanistan 1951 1.093342
2 Afghanistan 1952 1.093342
3 Afghanistan 1953 1.093342
4 Afghanistan 1954 1.093342
5 Afghanistan 1955 1.093342
6 Afghanistan 1956 1.093342
7 Afghanistan 1957 1.093342
8 Afghanistan 1958 1.093342
9 Afghanistan 1959 1.093342
Cost of a healthy diet_lag2 Cost of a healthy diet_lag3 Income_lag1 \
0 1.093342 1.093342 1.044547
1 1.093342 1.093342 1.044547
2 1.093342 1.093342 1.044547
3 1.093342 1.093342 1.044547
4 1.093342 1.093342 1.044547
5 1.093342 1.093342 1.044547
6 1.093342 1.093342 1.044547
7 1.093342 1.093342 1.044547
8 1.093342 1.093342 1.044547
9 1.093342 1.093342 1.044547
Income_lag2 Income_lag3 Inflation_lag1 Inflation_lag2 ... GDP_lag3 \
0 1.044547 1.044547 2.16928 2.16928 ... 20.705163
1 1.044547 1.044547 2.16928 2.16928 ... 20.705163
2 1.044547 1.044547 2.16928 2.16928 ... 20.705163
3 1.044547 1.044547 2.16928 2.16928 ... 20.705163
4 1.044547 1.044547 2.16928 2.16928 ... 20.705163
5 1.044547 1.044547 2.16928 2.16928 ... 20.705163
6 1.044547 1.044547 2.16928 2.16928 ... 20.705163
7 1.044547 1.044547 2.16928 2.16928 ... 20.705163
8 1.044547 1.044547 2.16928 2.16928 ... 20.705163
9 1.044547 1.044547 2.16928 2.16928 ... 20.705163
Median age_lag1 Median age_lag2 Median age_lag3 CPI_lag1 CPI_lag2 \
0 0.877229 0.877229 0.877229 8.990907 8.990907
1 0.877229 0.877229 0.877229 8.990907 8.990907
2 0.877178 0.877229 0.877229 8.990907 8.990907
3 0.877104 0.877178 0.877229 8.990907 8.990907
4 0.877015 0.877104 0.877178 8.990907 8.990907
5 0.876914 0.877015 0.877104 8.990907 8.990907
6 0.876807 0.876914 0.877015 8.990907 8.990907
7 0.876703 0.876807 0.876914 8.990907 8.990907
8 0.876593 0.876703 0.876807 8.990907 8.990907
9 0.876462 0.876593 0.876703 8.990907 8.990907
CPI_lag3 BMI_avg_lag1 BMI_avg_lag2 BMI_avg_lag3
0 8.990907 18.04494 18.04494 18.04494
1 8.990907 18.04494 18.04494 18.04494
2 8.990907 18.04494 18.04494 18.04494
3 8.990907 18.04494 18.04494 18.04494
4 8.990907 18.04494 18.04494 18.04494
5 8.990907 18.04494 18.04494 18.04494
6 8.990907 18.04494 18.04494 18.04494
7 8.990907 18.04494 18.04494 18.04494
8 8.990907 18.04494 18.04494 18.04494
9 8.990907 18.04494 18.04494 18.04494
[10 rows x 38 columns]
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 1.093342 1.044547 2.16928
1951 1.093342 1.044547 2.16928
1952 1.093342 1.044547 2.16928
1953 1.093342 1.044547 2.16928
1954 1.093342 1.044547 2.16928
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 3.074717 2.002608 28.1563
1951 3.074717 2.002608 28.5836
1952 3.074717 2.002608 29.0138
1953 3.074717 2.002608 29.4521
1954 3.074717 2.002608 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.349532 0.13767 6.2
1951 0.349532 0.13767 6.2
1952 0.349532 0.13767 6.2
1953 0.349532 0.13767 6.2
1954 0.349532 0.13767 6.2
Cardiovascular diseases ... GDP_lag3 Median age_lag1 \
Country Year ...
Afghanistan 1950 3.97278 ... 20.705163 0.877229
1951 3.97278 ... 20.705163 0.877229
1952 3.97278 ... 20.705163 0.877178
1953 3.97278 ... 20.705163 0.877104
1954 3.97278 ... 20.705163 0.877015
Median age_lag2 Median age_lag3 CPI_lag1 CPI_lag2 \
Country Year
Afghanistan 1950 0.877229 0.877229 8.990907 8.990907
1951 0.877229 0.877229 8.990907 8.990907
1952 0.877229 0.877229 8.990907 8.990907
1953 0.877178 0.877229 8.990907 8.990907
1954 0.877104 0.877178 8.990907 8.990907
CPI_lag3 BMI_avg_lag1 BMI_avg_lag2 BMI_avg_lag3
Country Year
Afghanistan 1950 8.990907 18.04494 18.04494 18.04494
1951 8.990907 18.04494 18.04494 18.04494
1952 8.990907 18.04494 18.04494 18.04494
1953 8.990907 18.04494 18.04494 18.04494
1954 8.990907 18.04494 18.04494 18.04494
[5 rows x 52 columns]
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 1.093342 1.044547 2.16928
1951 1.093342 1.044547 2.16928
1952 1.093342 1.044547 2.16928
1953 1.093342 1.044547 2.16928
1954 1.093342 1.044547 2.16928
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 3.074717 2.002608 28.1563
1951 3.074717 2.002608 28.5836
1952 3.074717 2.002608 29.0138
1953 3.074717 2.002608 29.4521
1954 3.074717 2.002608 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.349532 0.13767 6.2
1951 0.349532 0.13767 6.2
1952 0.349532 0.13767 6.2
1953 0.349532 0.13767 6.2
1954 0.349532 0.13767 6.2
Cardiovascular diseases ... Median age_lag1 \
Country Year ...
Afghanistan 1950 3.97278 ... NaN
1951 3.97278 ... 0.877229
1952 3.97278 ... 0.877178
1953 3.97278 ... 0.877104
1954 3.97278 ... 0.877015
Median age_lag2 Median age_lag3 CPI_lag1 CPI_lag2 \
Country Year
Afghanistan 1950 NaN NaN NaN NaN
1951 NaN NaN 8.990907 NaN
1952 0.877229 NaN 8.990907 8.990907
1953 0.877178 0.877229 8.990907 8.990907
1954 0.877104 0.877178 8.990907 8.990907
CPI_lag3 BMI_avg_lag1 BMI_avg_lag2 BMI_avg_lag3 lagged
Country Year
Afghanistan 1950 NaN NaN NaN NaN True
1951 NaN 18.04494 NaN NaN True
1952 NaN 18.04494 18.04494 NaN True
1953 8.990907 18.04494 18.04494 18.04494 True
1954 8.990907 18.04494 18.04494 18.04494 True
[5 rows x 52 columns]
Restore Index - Country and Year¶
## restore Country and year for df_transformed
# Restore index — only if not already set
if 'Country' not in df_transformed.index.names or 'Year' not in df_transformed.index.names:
df_transformed = df_transformed.set_index(['Country', 'Year'])
# Optional: sort for time-aware operations
df_transformed = df_transformed.sort_index()
# Preview index structure
print("✅ Index restored — here’s a sample:")
print(df_transformed.head())
✅ Index restored — here’s a sample:
Cost of a healthy diet Income Inflation \
Country Year
Afghanistan 1950 1.093342 1.044547 2.16928
1951 1.093342 1.044547 2.16928
1952 1.093342 1.044547 2.16928
1953 1.093342 1.044547 2.16928
1954 1.093342 1.044547 2.16928
Child mortality rate Unemployment Rate Life expectancy \
Country Year
Afghanistan 1950 3.074717 2.002608 28.1563
1951 3.074717 2.002608 28.5836
1952 3.074717 2.002608 29.0138
1953 3.074717 2.002608 29.4521
1954 3.074717 2.002608 29.6975
Incomplete tertiary education Gini coefficient Diabetes \
Country Year
Afghanistan 1950 0.349532 0.13767 6.2
1951 0.349532 0.13767 6.2
1952 0.349532 0.13767 6.2
1953 0.349532 0.13767 6.2
1954 0.349532 0.13767 6.2
Cardiovascular diseases Sex ratio GDP Median age \
Country Year
Afghanistan 1950 3.97278 0.363604 20.705163 0.877229
1951 3.97278 0.363604 20.705163 0.877178
1952 3.97278 0.363604 20.705163 0.877104
1953 3.97278 0.363604 20.705163 0.877015
1954 3.97278 0.363604 20.705163 0.876914
CPI BMI_avg lagged
Country Year
Afghanistan 1950 8.990907 18.04494 False
1951 8.990907 18.04494 False
1952 8.990907 18.04494 False
1953 8.990907 18.04494 False
1954 8.990907 18.04494 False
Identify the Best Feature Selection Method and The Best Number of Features for Modeling¶
Comparison for three types of Feature selection method by using RMSE with the best number of feature used in the modeling:
- LASSO (Least Absolute Shrinkage and Selection Operator)
- RFE (Recursive Feature Elimination)
- Forward Selection
- Random Forest Each method selects features based on different principles, and for each method, the code evaluates models using a different number of features—starting from 1 up to a maximum (e.g., 15). For each configuration (method + number of features), the model’s performance is assessed using TimeSeriesSplit cross-validation and Root Mean Squared Error (RMSE) as the evaluation metric. The process is repeated for each of your target variables separately. The method and feature count with the lowest RMSE is considered optimal for that target.
Feature selection comparison¶
# feature selection comparison
from sklearn.linear_model import Ridge
def find_best_feature_count(X_df, y, max_features=None):
import numpy as np
import pandas as pd
from sklearn.linear_model import LassoCV, LinearRegression
from sklearn.feature_selection import RFE, SequentialFeatureSelector
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
# --- Scale X and y ---
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_df)
feature_names = X_df.columns.tolist()
y = y.values.reshape(-1, 1)
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y).ravel()
y_original = y.ravel()
tscv = TimeSeriesSplit(n_splits=3)
def rmse_on_original_scale(model, X_subset):
y_preds, y_tests = [], []
for train_idx, test_idx in tscv.split(X_subset):
model.fit(X_subset[train_idx], y_scaled[train_idx])
y_pred_scaled = model.predict(X_subset[test_idx])
y_pred_original = y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_preds.extend(y_pred_original)
y_tests.extend(y_original[test_idx])
return np.sqrt(mean_squared_error(y_tests, y_preds))
# --- Feature Selection ---
max_features = min(max_features or 20, X_scaled.shape[1] - 1)
lasso = LassoCV(cv=tscv, random_state=42).fit(X_scaled, y_scaled)
lasso_coef = lasso.coef_
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_scaled, y_scaled)
importances = rf_model.feature_importances_
lasso_rmse_list, rfe_rmse_list, sfs_rmse_list, rf_rmse_list = [], [], [], []
step = 2
for n in range(1, max_features + 1, step):
idx_lasso = np.argsort(np.abs(lasso_coef))[-n:]
X_lasso = X_scaled[:, idx_lasso]
lasso_rmse_list.append((n, rmse_on_original_scale(LinearRegression(), X_lasso)))
try:
rfe = RFE(LinearRegression(), n_features_to_select=n)
X_rfe = rfe.fit_transform(X_scaled, y_scaled)
rfe_rmse_list.append((n, rmse_on_original_scale(LinearRegression(), X_rfe)))
except:
rfe_rmse_list.append((n, np.nan))
try:
sfs = SequentialFeatureSelector(LinearRegression(), n_features_to_select=n, direction='forward', cv=tscv, , n_jobs=-1)
X_sfs = sfs.fit_transform(X_scaled, y_scaled)
sfs_rmse_list.append((n, rmse_on_original_scale(LinearRegression(), X_sfs)))
except:
sfs_rmse_list.append((n, np.nan))
idx_rf = np.argsort(importances)[-n:]
X_rf = X_scaled[:, idx_rf]
rf_rmse_list.append((n, rmse_on_original_scale(LinearRegression(), X_rf)))
df_combined = (
pd.DataFrame(lasso_rmse_list, columns=['n_features', 'LASSO_RMSE'])
.merge(pd.DataFrame(rfe_rmse_list, columns=['n_features', 'RFE_RMSE']), on='n_features')
.merge(pd.DataFrame(sfs_rmse_list, columns=['n_features', 'Forward_RMSE']), on='n_features')
.merge(pd.DataFrame(rf_rmse_list, columns=['n_features', 'RF_RMSE']), on='n_features')
)
# --- Feature Names ---
best_lasso_n = df_combined.loc[df_combined['LASSO_RMSE'].idxmin(), 'n_features']
best_rfe_n = df_combined.loc[df_combined['RFE_RMSE'].idxmin(), 'n_features']
best_sfs_n = df_combined.loc[df_combined['Forward_RMSE'].idxmin(), 'n_features']
best_rf_n = df_combined.loc[df_combined['RF_RMSE'].idxmin(), 'n_features']
lasso_features = [feature_names[i] for i in np.argsort(np.abs(lasso_coef))[-best_lasso_n:]]
rfe = RFE(LinearRegression(), n_features_to_select=best_rfe_n).fit(X_scaled, y_scaled)
rfe_features = [feature_names[i] for i, flag in enumerate(rfe.support_) if flag]
sfs = SequentialFeatureSelector(LinearRegression(), n_features_to_select=best_sfs_n, direction='forward', cv=tscv).fit(X_scaled, y_scaled)
sfs_features = [feature_names[i] for i, flag in enumerate(sfs.get_support()) if flag]
rf_features = [feature_names[i] for i in np.argsort(importances)[-best_rf_n:]]
best_methods = {
'LASSO': {'n_features': best_lasso_n, 'rmse': df_combined.loc[df_combined['n_features'] == best_lasso_n, 'LASSO_RMSE'].values[0], 'features': lasso_features},
'RFE': {'n_features': best_rfe_n, 'rmse': df_combined.loc[df_combined['n_features'] == best_rfe_n, 'RFE_RMSE'].values[0], 'features': rfe_features},
'Forward': {'n_features': best_sfs_n, 'rmse': df_combined.loc[df_combined['n_features'] == best_sfs_n, 'Forward_RMSE'].values[0], 'features': sfs_features},
'RandomForest': {'n_features': best_rf_n, 'rmse': df_combined.loc[df_combined['n_features'] == best_rf_n, 'RF_RMSE'].values[0], 'features': rf_features}
}
return df_combined, best_methods
import matplotlib.pyplot as plt
target_cols = ['Cardiovascular diseases', 'Diabetes', 'Life expectancy']
results = {}
for target in target_cols:
lag_cols = [f'{target}_lag1', f'{target}_lag2']
cols_to_drop = target_cols + [col for col in lag_cols if col in df_lagged.columns]
X = df_lagged.drop(columns=cols_to_drop)
y = df_lagged[target]
print(f"\n🔍 Feature selection for target: {target}")
df_combined, best_methods = find_best_feature_count(X, y)
results[target] = {'df_combined': df_combined, 'best_methods': best_methods}
for method, info in best_methods.items():
print(f"\nMethod: {method}")
print(f"Best number of features: {info['n_features']}")
print(f"Best RMSE: {info['rmse']:.4f}")
print(f"Selected features: {info['features']}")
plt.figure(figsize=(10,6))
plt.plot(df_combined['n_features'], df_combined['LASSO_RMSE'], label='LASSO', marker='o')
plt.plot(df_combined['n_features'], df_combined['RFE_RMSE'], label='RFE', marker='s')
plt.plot(df_combined['n_features'], df_combined['Forward_RMSE'], label='Forward', marker='^')
plt.plot(df_combined['n_features'], df_combined['RF_RMSE'], label='Random Forest', marker='v')
plt.xlabel('Number of Features')
plt.ylabel('RMSE')
plt.title(f'RMSE vs Number of Features for Target: {target}')
plt.grid(True)
plt.legend()
plt.show()
🔍 Feature selection for target: Cardiovascular diseases
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 2.0850734284358623, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 8.305569563730387, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 17.32235788048456, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 33.52275853433639, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 60.11339118560136, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 106.76487348795217, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 157.9697749504976, tolerance: 1.065338423995386 model = cd_fast.enet_coordinate_descent_gram(
Method: LASSO Best number of features: 1 Best RMSE: 85.6760 Selected features: ['BMI_avg_lag3'] Method: RFE Best number of features: 5 Best RMSE: 83.6979 Selected features: ['Median age', 'Median age_lag1', 'Median age_lag2', 'Median age_lag3', 'BMI_avg_lag3'] Method: Forward Best number of features: 1 Best RMSE: 85.6760 Selected features: ['BMI_avg_lag3'] Method: RandomForest Best number of features: 30 Best RMSE: 108.7010 Selected features: ['Income_lag3', 'BMI_avg_lag1', 'Gini coefficient', 'Child mortality rate', 'Median age_lag1', 'Inflation_lag3', 'Unemployment Rate_lag1', 'Unemployment Rate_lag3', 'Unemployment Rate', 'Inflation', 'Unemployment Rate_lag2', 'BMI_avg_lag2', 'Median age_lag3', 'Incomplete tertiary education_lag2', 'Incomplete tertiary education_lag1', 'Incomplete tertiary education_lag3', 'CPI', 'Incomplete tertiary education', 'GDP_lag1', 'GDP', 'CPI_lag1', 'BMI_avg_lag3', 'CPI_lag3', 'Cost of a healthy diet_lag1', 'GDP_lag2', 'CPI_lag2', 'Cost of a healthy diet', 'Cost of a healthy diet_lag3', 'Cost of a healthy diet_lag2', 'GDP_lag3']
🔍 Feature selection for target: Diabetes Method: LASSO Best number of features: 3 Best RMSE: 3.8882 Selected features: ['Median age_lag3', 'Income', 'BMI_avg_lag3'] Method: RFE Best number of features: 6 Best RMSE: 3.8755 Selected features: ['Income', 'CPI', 'Median age_lag1', 'Median age_lag2', 'Median age_lag3', 'BMI_avg_lag3'] Method: Forward Best number of features: 20 Best RMSE: 3.8593 Selected features: ['Income', 'Child mortality rate', 'Sex ratio', 'CPI', 'BMI_avg', 'Income_lag1', 'Income_lag2', 'Income_lag3', 'Child mortality rate_lag1', 'Child mortality rate_lag2', 'Child mortality rate_lag3', 'Sex ratio_lag1', 'Sex ratio_lag2', 'Sex ratio_lag3', 'CPI_lag1', 'CPI_lag2', 'CPI_lag3', 'BMI_avg_lag1', 'BMI_avg_lag2', 'BMI_avg_lag3'] Method: RandomForest Best number of features: 2 Best RMSE: 3.8922 Selected features: ['Income', 'BMI_avg_lag3']
🔍 Feature selection for target: Life expectancy Method: LASSO Best number of features: 9 Best RMSE: 4.2596 Selected features: ['Median age_lag1', 'CPI_lag3', 'BMI_avg_lag1', 'BMI_avg_lag2', 'CPI_lag2', 'BMI_avg_lag3', 'Income', 'Median age', 'Child mortality rate'] Method: RFE Best number of features: 11 Best RMSE: 4.2227 Selected features: ['Income', 'Child mortality rate', 'Incomplete tertiary education', 'Median age', 'Income_lag3', 'Child mortality rate_lag1', 'Child mortality rate_lag3', 'Incomplete tertiary education_lag3', 'Median age_lag1', 'Median age_lag2', 'Median age_lag3'] Method: Forward Best number of features: 13 Best RMSE: 4.2179 Selected features: ['Income', 'Inflation', 'Child mortality rate', 'Median age', 'Income_lag3', 'Inflation_lag1', 'Inflation_lag2', 'Inflation_lag3', 'Child mortality rate_lag3', 'Incomplete tertiary education_lag3', 'Median age_lag1', 'Median age_lag2', 'Median age_lag3'] Method: RandomForest Best number of features: 4 Best RMSE: 4.2880 Selected features: ['Median age', 'Income', 'Child mortality rate_lag3', 'Child mortality rate']
Feature selection Comparison with R sq, MAPE, MSE¶
# Feature selection with R sq, MAPE, MSE
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LassoCV, LinearRegression
from sklearn.feature_selection import RFE, SequentialFeatureSelector
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import StandardScaler
# Move plot_metrics function definition to the beginning
def plot_metrics(df_combined, target_name):
metrics = ['RMSE', 'MAPE', 'R2']
methods = ['LASSO', 'RFE', 'Forward', 'RandomForest']
for metric in metrics:
plt.figure(figsize=(10,6))
for method in methods:
# Check if the metric column exists for the method before plotting
metric_col = f'{method}_{metric}'
if metric_col in df_combined.columns:
plt.plot(df_combined['n_features'], df_combined[metric_col], label=method, marker='o')
else:
print(f"Warning: Metric column '{metric_col}' not found in DataFrame for plotting.")
plt.title(f'{metric} vs Number of Features ({target_name})')
plt.xlabel('Number of Features')
plt.ylabel(metric)
plt.legend()
plt.grid(True)
plt.show()
def evaluate_model(model, X_subset, y_scaled, y_original, y_scaler, tscv):
y_preds, y_tests = [], []
# Ensure X_subset and y_scaled have the same index for splitting
# Convert X_subset to DataFrame if it's numpy array to use index for splitting
if not isinstance(X_subset, pd.DataFrame):
# Assuming X_subset corresponds to the same rows as y_scaled
X_subset_df = pd.DataFrame(X_subset, index=pd.Series(y_scaled).index)
else:
X_subset_df = X_subset
for train_idx, test_idx in tscv.split(X_subset_df): # Use X_subset_df for splitting
# Select data using indices from the split
X_train, X_test = X_subset_df.iloc[train_idx], X_subset_df.iloc[test_idx]
y_train_scaled, y_test_scaled = y_scaled[train_idx], y_scaled[test_idx]
y_test_original = y_original[test_idx] # Select original y for test set
# Ensure there's data in train and test sets for the current fold
if len(X_train) > 0 and len(X_test) > 0:
try:
model.fit(X_train, y_train_scaled)
y_pred_scaled = model.predict(X_test)
y_pred_original = y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_preds.extend(y_pred_original)
y_tests.extend(y_test_original)
except Exception as e:
print(f"Error during model fitting or prediction in a fold: {e}")
# Extend with NaNs or skip if error occurs in a fold
y_preds.extend([np.nan] * len(y_test_original))
y_tests.extend(y_test_original) # Still add the test actuals to keep lists aligned
# Calculate metrics only if y_tests and y_preds are not empty and don't contain NaNs/Infs
y_tests_cleaned = np.array(y_tests)
y_preds_cleaned = np.array(y_preds)
# Remove pairs where either actual or prediction is NaN/Inf
valid_indices = np.isfinite(y_tests_cleaned) & np.isfinite(y_preds_cleaned)
y_tests_cleaned = y_tests_cleaned[valid_indices]
y_preds_cleaned = y_preds_cleaned[valid_indices]
if len(y_tests_cleaned) > 0:
rmse = np.sqrt(mean_squared_error(y_tests_cleaned, y_preds_cleaned))
mape = mean_absolute_percentage_error(y_tests_cleaned, y_preds_cleaned)
r2 = r2_score(y_tests_cleaned, y_preds_cleaned)
else:
# Return NaN if no valid data points for metric calculation
rmse, mape, r2 = np.nan, np.nan, np.nan
return rmse, mape, r2
def find_best_features_with_metrics(X_df, y, max_features=None):
# Ensure X_df has a proper index for splitting in evaluate_model
if not isinstance(X_df.index, pd.MultiIndex):
# Assuming X_df came from df_lagged which has MultiIndex, try to restore it
# This might require passing the original index or ensuring X_df retains it
# For robustness, let's assume X_df needs an index for splitting
# A simpler approach might be to reset index in evaluate_model if it's numpy array
pass # Let's handle index in evaluate_model as it receives X_subset
X_scaler = StandardScaler()
# Fit scaler on X_df values, but keep X_df as DataFrame to retain index
X_scaled_values = X_scaler.fit_transform(X_df.values)
X_scaled_df = pd.DataFrame(X_scaled_values, columns=X_df.columns, index=X_df.index) # Recreate DataFrame with index
feature_names = X_scaled_df.columns.tolist()
y = y.values.reshape(-1, 1) # y is already a Series from df_lagged, convert to numpy array
y_original = y.ravel() # Keep original y values as numpy array
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y).ravel() # Scale y
tscv = TimeSeriesSplit(n_splits=5)
max_features = min(max_features or 30, X_scaled_df.shape[1]) # Max features up to total features
# Handle case where there are no features
if X_scaled_df.shape[1] == 0:
print("No features available in X_df. Skipping feature selection.")
return pd.DataFrame(), {'LASSO': {'n_features': 0, 'rmse': np.nan, 'features': []},
'RFE': {'n_features': 0, 'rmse': np.nan, 'features': []},
'Forward': {'n_features': 0, 'rmse': np.nan, 'features': []},
'RandomForest': {'n_features': 0, 'rmse': np.nan, 'features': []}}
lasso = LassoCV(cv=tscv, random_state=42).fit(X_scaled_df, y_scaled)
lasso_coef = lasso.coef_
# Ensure Random Forest is fitted on X_scaled_df (DataFrame)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_scaled_df, y_scaled)
importances = rf_model.feature_importances_
results = {'LASSO': [], 'RFE': [], 'Forward': [], 'RandomForest': []}
# Max features for loop should be <= total features
max_loop_features = min(max_features, X_scaled_df.shape[1])
for n in range(1, max_loop_features + 1):
# LASSO
idx = np.argsort(np.abs(lasso_coef))[-n:]
# Select columns using index from X_scaled_df
X_subset_lasso = X_scaled_df.iloc[:, idx]
# Pass DataFrame to evaluate_model
results['LASSO'].append((n, *evaluate_model(LinearRegression(), X_subset_lasso, y_scaled, y_original, y_scaler, tscv)))
# RFE
try:
# RFE requires n_features_to_select <= n_features
if n <= X_scaled_df.shape[1]:
rfe = RFE(LinearRegression(), n_features_to_select=n)
# Fit on X_scaled_df (DataFrame) and get transformed numpy array
X_subset_rfe_np = rfe.fit_transform(X_scaled_df, y_scaled)
# Pass numpy array to evaluate_model - evaluate_model handles conversion to DataFrame for splitting
results['RFE'].append((n, *evaluate_model(LinearRegression(), X_subset_rfe_np, y_scaled, y_original, y_scaler, tscv)))
else:
results['RFE'].append((n, np.nan, np.nan, np.nan))
except Exception as e:
print(f"RFE failed for n={n}: {e}")
results['RFE'].append((n, np.nan, np.nan, np.nan))
# Forward
try:
# SFS requires k_features <= n_features
if n <= X_scaled_df.shape[1]:
# Use X_scaled_df (DataFrame) for SFS fit
sfs = SequentialFeatureSelector(LinearRegression(), n_features_to_select=n, direction='forward', cv=tscv, n_jobs=-1)
# Fit on X_scaled_df (DataFrame) and get transformed numpy array
X_subset_sfs_np = sfs.fit_transform(X_scaled_df, y_scaled)
# Pass numpy array to evaluate_model
results['Forward'].append((n, *evaluate_model(LinearRegression(), X_subset_sfs_np, y_scaled, y_original, y_scaler, tscv)))
else:
results['Forward'].append((n, np.nan, np.nan, np.nan))
except Exception as e:
print(f"Forward Selection failed for n={n}: {e}")
results['Forward'].append((n, np.nan, np.nan, np.nan))
# RF Importance
idx = np.argsort(importances)[-n:]
# Select columns using index from X_scaled_df
X_subset_rf = X_scaled_df.iloc[:, idx]
# Pass DataFrame to evaluate_model
results['RandomForest'].append((n, *evaluate_model(LinearRegression(), X_subset_rf, y_scaled, y_original, y_scaler, tscv)))
# Build metrics DataFrame
dfs = []
for method, vals in results.items():
df = pd.DataFrame(vals, columns=['n_features', f'{method}_RMSE', f'{method}_MAPE', f'{method}_R2'])
dfs.append(df)
df_combined = dfs[0]
for df in dfs[1:]:
df_combined = df_combined.merge(df, on='n_features', how='outer')
return df_combined
# Assuming df_lagged is available and contains the data with lags
# Assuming target_cols is defined
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
results = {}
for target in target_cols:
# Ensure df_lagged is available and contains the target column
if 'df_lagged' in locals() and target in df_lagged.columns:
lag_cols = [f'{target}_lag1', f'{target}_lag2']
# Ensure we only try to drop columns that exist in df_lagged
cols_to_drop = [target] + [col for col in lag_cols if col in df_lagged.columns]
# Select features for X - drop target(s) and their lags
X = df_lagged.drop(columns=cols_to_drop)
# Select the current target variable and drop NaNs
y = df_lagged[target].dropna()
# Align X with the cleaned y by index
X = X.loc[y.index]
# Ensure X is not empty after aligning with y
if X.empty:
print(f"No valid data points after dropping NaNs for target: {target}. Skipping evaluation.")
results[target] = pd.DataFrame() # Store an empty DataFrame
continue
print(f"\n🔍 Evaluating for target: {target}")
# Pass X as a DataFrame and y as a Series (without NaNs)
df_metrics = find_best_features_with_metrics(X, y)
results[target] = df_metrics
# Plot metrics for the current target only if df_metrics is not empty
if not df_metrics.empty:
plot_metrics(df_metrics, target)
else:
print(f"No metrics to plot for target: {target}.")
else:
print(f"df_lagged or target column '{target}' not found. Skipping evaluation for this target.")
🔍 Evaluating for target: Life expectancy
🔍 Evaluating for target: Cardiovascular diseases
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.3344987559357833, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.3041213861906726, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.2939728821220342, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.291560398247384, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.2833807594306563, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.2738431218404003, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.263577498113591, tolerance: 1.2543280301043949 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.703431344112687, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6969510646304116, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6853641250145301, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6726185397310473, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6593221832208656, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6457220310112461, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.631997326414421, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.618292309503886, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram( /usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_coordinate_descent.py:681: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 1.6047235924033885, tolerance: 1.5837940758962923 model = cd_fast.enet_coordinate_descent_gram(
🔍 Evaluating for target: Diabetes
Comparative Summary Table - Feature Selection with metrics (RMSE, MAPE, and R²)¶
## The best Feature Selection with different metrics TABLE - OK - REVISED
# Install tabulate if needed
!pip install tabulate
from sklearn.linear_model import Ridge, LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SequentialFeatureSelector, RFE
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate
import pandas as pd
import numpy as np
# Main function with MAE instead of MAPE
def quick_metrics_summary(X_df, y, feature_step=3, max_features=15):
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_df)
y = y.values.reshape(-1, 1)
y_scaler = StandardScaler().fit(y)
y_scaled = y_scaler.transform(y).ravel()
y_original = y.ravel()
tscv = TimeSeriesSplit(n_splits=3)
def evaluate(X_subset):
y_preds, y_tests = [], []
for train_idx, test_idx in tscv.split(X_subset):
model = Ridge()
model.fit(X_subset[train_idx], y_scaled[train_idx])
pred = model.predict(X_subset[test_idx])
y_pred = y_scaler.inverse_transform(pred.reshape(-1, 1)).ravel()
y_preds.extend(y_pred)
y_tests.extend(y_original[test_idx])
return (
np.sqrt(mean_squared_error(y_tests, y_preds)),
mean_absolute_error(y_tests, y_preds),
r2_score(y_tests, y_preds)
)
lasso = LassoCV(cv=tscv, random_state=42).fit(X_scaled, y_scaled)
rf = RandomForestRegressor(n_estimators=50, random_state=42).fit(X_scaled, y_scaled)
results = []
for n in range(1, max_features + 1, feature_step):
# LASSO
idx_lasso = np.argsort(np.abs(lasso.coef_))[-n:]
rmse, mae, r2 = evaluate(X_scaled[:, idx_lasso])
results.append({'Method': 'LASSO', 'n_features': n, 'RMSE': rmse, 'MAE': mae, 'R²': r2})
# Random Forest
idx_rf = np.argsort(rf.feature_importances_)[-n:]
rmse, mae, r2 = evaluate(X_scaled[:, idx_rf])
results.append({'Method': 'RandomForest', 'n_features': n, 'RMSE': rmse, 'MAE': mae, 'R²': r2})
# Forward Selection
try:
sfs = SequentialFeatureSelector(Ridge(), n_features_to_select=n, direction='forward', cv=tscv, n_jobs=-1)
X_sfs = sfs.fit_transform(X_scaled, y_scaled)
rmse, mae, r2 = evaluate(X_sfs)
results.append({'Method': 'Forward', 'n_features': n, 'RMSE': rmse, 'MAE': mae, 'R²': r2})
except:
results.append({'Method': 'Forward', 'n_features': n, 'RMSE': np.nan, 'MAE': np.nan, 'R²': np.nan})
# RFE
try:
rfe = RFE(estimator=Ridge(), n_features_to_select=n)
X_rfe = rfe.fit_transform(X_scaled, y_scaled)
rmse, mae, r2 = evaluate(X_rfe)
results.append({'Method': 'RFE', 'n_features': n, 'RMSE': rmse, 'MAE': mae, 'R²': r2})
except:
results.append({'Method': 'RFE', 'n_features': n, 'RMSE': np.nan, 'MAE': np.nan, 'R²': np.nan})
return pd.DataFrame(results)
# Loop over targets
results_dict = {}
for target in ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']:
features = [col for col in df_lagged.columns if col != target]
X = df_combined[features]
y = df_combined[target]
print(f"\nRunning for: {target}")
df_summary = quick_metrics_summary(X, y, feature_step=3, max_features=15)
results_dict[target] = df_summary
# Extract Best Results per Method
def extract_best_per_method(results_dict):
summary = []
for target, df in results_dict.items():
for method in ['LASSO', 'RandomForest', 'Forward', 'RFE']:
method_df = df[df['Method'] == method].dropna()
if not method_df.empty:
best_row = method_df.sort_values('RMSE').iloc[0]
summary.append({
'Target': target,
'Method': method,
'n_features': int(best_row['n_features']),
'RMSE': round(best_row['RMSE'], 2),
'MAE': round(best_row['MAE'], 2),
'R²': round(best_row['R²'], 4)
})
return pd.DataFrame(summary)
# Print Final Table
best_performance_df = extract_best_per_method(results_dict)
print("\nBest Performance per Method\n")
print(tabulate(best_performance_df, headers='keys', tablefmt='fancy_grid', showindex=False))
Requirement already satisfied: tabulate in /usr/local/lib/python3.11/dist-packages (0.9.0) Running for: Life expectancy Running for: Cardiovascular diseases Running for: Diabetes Best Performance per Method ╒═════════════════════════╤══════════════╤══════════════╤════════╤═══════╤═════════╕ │ Target │ Method │ n_features │ RMSE │ MAE │ R² │ ╞═════════════════════════╪══════════════╪══════════════╪════════╪═══════╪═════════╡ │ Life expectancy │ LASSO │ 13 │ 4.75 │ 3.14 │ 0.8331 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Life expectancy │ RandomForest │ 4 │ 4.78 │ 3.16 │ 0.8308 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Life expectancy │ Forward │ 13 │ 4.74 │ 3.15 │ 0.8336 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Life expectancy │ RFE │ 13 │ 4.76 │ 3.15 │ 0.8323 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Cardiovascular diseases │ LASSO │ 1 │ 142.5 │ 57.25 │ -0.0266 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Cardiovascular diseases │ RandomForest │ 1 │ 142.58 │ 57.59 │ -0.0277 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Cardiovascular diseases │ Forward │ 1 │ 142.03 │ 57.55 │ -0.0198 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Cardiovascular diseases │ RFE │ 1 │ 142.35 │ 57.54 │ -0.0244 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Diabetes │ LASSO │ 7 │ 3.91 │ 2.72 │ 0.4239 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Diabetes │ RandomForest │ 4 │ 3.94 │ 2.7 │ 0.4158 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Diabetes │ Forward │ 13 │ 3.86 │ 2.69 │ 0.4381 │ ├─────────────────────────┼──────────────┼──────────────┼────────┼───────┼─────────┤ │ Diabetes │ RFE │ 13 │ 3.89 │ 2.72 │ 0.4291 │ ╘═════════════════════════╧══════════════╧══════════════╧════════╧═══════╧═════════╛
Base on the result of the table, the following Feature Selection method and number of features will be used in this study as follows:
Life Expectancy - Forward Selection - # of features = 13
Cardiovascular Diseases - Forward Selection - # of features = 1
Diabetes - Forward Selection - # of features = 13
# # Plot The best Feature Selection with different metrics
import seaborn as sns
import matplotlib.pyplot as plt
# Step 1: Normalize each metric for fair comparison
normalized_df = best_performance_df.copy()
for metric in ['RMSE', 'MAE', 'R²']:
if metric == 'R²':
# Higher is better → normalize normally
mmin, mmax = normalized_df[metric].min(), normalized_df[metric].max()
normalized_df[metric + '_norm'] = (normalized_df[metric] - mmin) / (mmax - mmin)
else:
# Lower is better → invert and normalize
mmin, mmax = normalized_df[metric].min(), normalized_df[metric].max()
normalized_df[metric + '_norm'] = 1 - (normalized_df[metric] - mmin) / (mmax - mmin)
# Step 2: Melt for plotting
plot_df = normalized_df.melt(
id_vars=['Target', 'Method', 'n_features'],
value_vars=['RMSE_norm', 'MAE_norm', 'R²_norm'],
var_name='Metric',
value_name='Normalized Score'
)
# Step 3: Plot all metrics in a grouped bar chart
plt.figure(figsize=(12, 6))
sns.barplot(
data=plot_df,
x='Method',
y='Normalized Score',
hue='Metric',
palette='Set2',
dodge=True,
edgecolor=None, # removes black edge lines
linewidth=0 # makes sure border width is zero
)
# Add after sns.barplot(...)
for patch in plt.gca().patches:
patch.set_edgecolor('none')
patch.set_linewidth(0)
patch.set_capstyle('round') # Smooth edges
plt.title('Normalized Model Performance Across Metrics', fontsize=16)
plt.xlabel('Feature Selection Method')
plt.ylabel('Normalized Score (0=worst, 1=best)')
plt.xticks(rotation=30)
plt.legend(title='Metric')
plt.grid(axis='y', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
Feature Selection¶
Feature Selection¶
Target Variables: Life Expectancy, Diabetes and Cardiovascular disease
# Forward Selection - Life Expectancy, Diabetes and Cardiovascular disease
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
# === Target Feature Limits
targets = {
'Life expectancy': 13,
'Diabetes': 13,
'Cardiovascular diseases': 1
}
# remove unwanted column
if 'lagged' in df_lagged.columns:
df_lagged = df_lagged.drop(columns='lagged')
# === Prepare excluded columns
all_target_keywords = list(targets.keys()) + ['Cardiovascular diseases']
excluded_cols = [
col for col in df_lagged.columns
if col in all_target_keywords
or any(col.startswith(f"{t}_lag") for t in all_target_keywords)
]
# === Forward Feature Selection Loop
for target, max_features in targets.items():
print(f"\n🎯 Target: {target}")
# Drop rows with any NaNs in features or target
X_raw = df_lagged.drop(columns=[col for col in excluded_cols if col in df_lagged.columns])
X_raw = X_raw.copy()
if target not in df_lagged.columns:
print(f"⚠️ Target '{target}' not found in dataset columns.")
continue
# Combine features and target temporarily to drop rows with any NaNs
df_temp = X_raw.copy()
df_temp[target] = df_lagged[target]
df_temp = df_temp.dropna()
# Separate features and target
X_raw = df_temp.drop(columns=[target])
y = df_temp[target]
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_raw), columns=X_raw.columns)
remaining_features = list(X_scaled.columns)
selected_features = []
best_rmse = np.inf
for i in range(max_features):
rmse_per_feature = {}
for feature in remaining_features:
trial_features = selected_features + [feature]
model = LinearRegression()
neg_mse = cross_val_score(model, X_scaled[trial_features], y,
scoring='neg_mean_squared_error', cv=5)
rmse = np.mean(np.sqrt(-neg_mse))
rmse_per_feature[feature] = rmse
# Select the feature with the lowest RMSE
best_feature = min(rmse_per_feature, key=rmse_per_feature.get)
selected_features.append(best_feature)
remaining_features.remove(best_feature)
best_rmse = rmse_per_feature[best_feature]
#print(f"✔️ Step {i+1}: Added '{best_feature}' (CV RMSE: {best_rmse:.4f})")
# Final evaluation
final_model = LinearRegression()
final_rmse = np.mean(np.sqrt(-cross_val_score(final_model, X_scaled[selected_features], y,
scoring='neg_mean_squared_error', cv=5)))
print(f"\n Top {max_features} features for {target}:")
print(selected_features)
print(f"Final Cross-Validated RMSE: {final_rmse:.4f}")
🎯 Target: Life expectancy Top 13 features for Life expectancy: ['Child mortality rate', 'Child mortality rate_lag3', 'Sex ratio_lag1', 'BMI_avg_lag3', 'GDP', 'Incomplete tertiary education_lag3', 'Child mortality rate_lag2', 'Sex ratio_lag3', 'Sex ratio', 'Incomplete tertiary education', 'Median age_lag3', 'Sex ratio_lag2', 'Incomplete tertiary education_lag2'] Final Cross-Validated RMSE: 4.2686 🎯 Target: Diabetes Top 13 features for Diabetes: ['BMI_avg_lag3', 'Income', 'GDP', 'Sex ratio_lag3', 'CPI_lag3', 'BMI_avg', 'Sex ratio', 'Income_lag1', 'Sex ratio_lag1', 'BMI_avg_lag1', 'BMI_avg_lag2', 'Sex ratio_lag2', 'GDP_lag1'] Final Cross-Validated RMSE: 3.5154 🎯 Target: Cardiovascular diseases Top 1 features for Cardiovascular diseases: ['BMI_avg'] Final Cross-Validated RMSE: 132.1827
The features have been selected for target variables as follows:
- Life expectancy: ['Child mortality rate', 'Child mortality rate_lag3', 'Sex ratio_lag1', 'BMI_avg_lag3', 'GDP', 'Incomplete tertiary education_lag3', 'Child mortality rate_lag2', 'Sex ratio_lag3', 'Sex ratio', 'Incomplete tertiary education', 'Median age_lag3', 'Sex ratio_lag2', 'Incomplete tertiary education_lag2']
- Diabete: ['BMI_avg_lag3', 'Income', 'GDP', 'Sex ratio_lag3', 'CPI_lag3', 'BMI_avg', 'Sex ratio', 'Income_lag1', 'Sex ratio_lag1', 'BMI_avg_lag1', 'BMI_avg_lag2', 'Sex ratio_lag2', 'GDP_lag1']
- Cardiovascular disesase: ['BMI_avg']
Feature Importance¶
Feature importance quantifies how useful or valuable each feature (independent variable) is in predicting the target variable in a model.
Feature importance refers to a technique used to quantify how much each independent variable contributes to predicting the target variable in a machine learning model.
Depending on the type of model, feature importance can be measured in different ways. For example, in linear regression, it is typically based on the absolute value of the model coefficients, while in models like Random Forests, it reflects how much each feature reduces impurity across all decision trees.
In this project analyzing global health and economic indicators to forecast outcomes like life expectancy, cardiovascular disease rates, and diabetes—using feature importance provides several advantages. First, it enhances interpretability by identifying which variables are most influential in driving your predictions, which can be valuable for policy recommendations or academic insights. Second, it supports feature selection by helping you decide which features to keep, remove, or further engineer, thereby simplifying your models and reducing the risk of overfitting. Third, it offers practical domain insights by revealing which socio-economic or health-related factors are most critical over time. Lastly, focusing on the most important features can improve your forecasting accuracy and model generalization. Overall, incorporating feature importance strengthens your pipeline by making your models not only predictive but also explainable and actionable.
# Feature importance table
import pandas as pd
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
# === Targets and Features
targets = ['Cardiovascular diseases', 'Diabetes', 'Life expectancy']
all_features = [
'Child mortality rate', 'BMI_avg_lag3', 'GDP', 'Gini coefficient_lag3',
'Median age', 'Gini coefficient', 'Incomplete tertiary education', 'Income',
'Median age_lag3', 'Sex ratio', 'Income_lag1', 'Income_lag2', 'Income_lag3',
'Inflation', 'Sex ratio_lag3', 'Gini coefficient_lag2',
'Incomplete tertiary education_lag3', 'Sex ratio_lag1', 'Sex ratio_lag2'
]
# === Initialize importance table
importance_table = pd.DataFrame(index=all_features, columns=targets).fillna(0.0)
# === Run LassoCV for each target
for target in targets:
#print(f"🎯 Processing: {target}")
if target not in df_lagged.columns:
print(f"⚠️ Skipping - Target '{target}' not found in df_lagged.")
continue
# Drop NA rows for selected features + target
valid_features = [f for f in all_features if f in df_lagged.columns]
df_temp = df_lagged[valid_features + [target]].dropna()
X = df_temp[valid_features]
y = df_temp[target]
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Fit LassoCV
lasso = LassoCV(cv=5, random_state=42)
lasso.fit(X_scaled, y)
# Store absolute coefficients
for i, feature in enumerate(valid_features):
importance_table.loc[feature, target] = round(abs(lasso.coef_[i]), 4)
# === Sort table by Life Expectancy (optional)
importance_table = importance_table.sort_values(by='Life expectancy', ascending=False)
# === Style the table with only borders
styled_table = importance_table.style \
.set_table_styles([
{'selector': 'table',
'props': [('border-collapse', 'collapse'),
('border', '1px solid black')]},
{'selector': 'th, td',
'props': [('border', '1px solid black'),
('padding', '5px')]}
]) \
.set_caption("📋 Feature Importance Summary Table (Lasso)") \
.format(precision=4)
# === Display
display(styled_table)
# export
importance_table.to_csv("feature_importance_summary.csv")
| Cardiovascular diseases | Diabetes | Life expectancy | |
|---|---|---|---|
| Child mortality rate | 0.0000 | 0.1964 | 10.6372 |
| Median age_lag3 | 0.0000 | 0.0000 | 2.6116 |
| Median age | 0.0000 | 0.0670 | 2.1959 |
| Sex ratio | 0.0000 | 0.1312 | 0.3119 |
| BMI_avg_lag3 | 4.6601 | 3.2914 | 0.3008 |
| GDP | 2.1204 | 0.2911 | 0.2863 |
| Incomplete tertiary education_lag3 | 0.0000 | 0.0000 | 0.2037 |
| Sex ratio_lag3 | 0.0000 | 0.2236 | 0.1862 |
| Income | 0.0000 | 0.8189 | 0.1382 |
| Sex ratio_lag2 | 0.0000 | 0.0210 | 0.1343 |
| Inflation | 0.0000 | 0.0995 | 0.1337 |
| Sex ratio_lag1 | 0.0000 | 0.0335 | 0.0366 |
| Gini coefficient | 0.0000 | 0.0000 | 0.0185 |
| Gini coefficient_lag3 | 1.3903 | 0.0000 | 0.0000 |
| Incomplete tertiary education | 0.0000 | 0.1957 | 0.0000 |
| Income_lag1 | 0.0000 | 0.0670 | 0.0000 |
| Income_lag3 | 0.0000 | 0.3742 | 0.0000 |
| Income_lag2 | 0.0000 | 0.0693 | 0.0000 |
| Gini coefficient_lag2 | 0.0000 | 0.0000 | 0.0000 |
Feature Importance Plot¶
#import matplotlib.pyplot as plt
import pandas as pd
# === Feature importance summary data
data = {
'Feature': [
'Child mortality rate', 'BMI_avg_lag3', 'GDP', 'Gini coefficient_lag3',
'Median age', 'Gini coefficient', 'Incomplete tertiary education', 'Income',
'Median age_lag3', 'Sex ratio', 'Income_lag1', 'Income_lag2', 'Income_lag3',
'Inflation', 'Sex ratio_lag3', 'Gini coefficient_lag2',
'Incomplete tertiary education_lag3', 'Sex ratio_lag1', 'Sex ratio_lag2'
],
'Cardiovascular diseases': [
0.0000, 0.0778, 0.6041, 0.2821,
0.0000, 0.0000, 0.0000, 0.0000,
0.0360, 0.0000, 0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000
],
'Diabetes': [
0.0236, 0.6894, 0.0000, 0.0000,
0.0503, 0.0524, 0.0466, 0.0490,
0.0000, 0.0229, 0.0270, 0.0214, 0.0175,
0.0000, 0.0000, 0.0000,
0.0000, 0.0000, 0.0000
],
'Life expectancy': [
0.9287, 0.0000, 0.0081, 0.0040,
0.0129, 0.0051, 0.0049, 0.0000,
0.0000, 0.0058, 0.0000, 0.0000, 0.0000,
0.0080, 0.0057, 0.0044,
0.0043, 0.0042, 0.0038
]
}
# === Create DataFrame
df_importance = pd.DataFrame(data)
df_importance.set_index('Feature', inplace=True)
# === Optional: Remove features with zero importance across all targets
df_importance = df_importance[(df_importance != 0).any(axis=1)]
# === Plotting
fig, axes = plt.subplots(nrows=3, figsize=(10, 12), sharex=True)
targets = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
colors = ['#2ca02c', '#1f77b4', '#ff7f0e']
for i, target in enumerate(targets):
ax = axes[i]
df_importance[target].sort_values(ascending=True).plot(
kind='barh', ax=ax, color=colors[i]
)
ax.set_title(f'Feature Importance for {target}', fontsize=12)
ax.set_xlabel('Importance')
ax.set_ylabel('Feature')
ax.grid(True, linestyle='--', alpha=0.4)
ax.set_xlim(left=0)
plt.tight_layout()
plt.show()
Result of feature importance:
🔹 Life Expectancy The most influential feature remains Child mortality rate with a commanding importance score of 0.9287, reinforcing its role as the primary determinant of overall lifespan across populations. Secondary contributors include Median age (0.0129), Inflation (0.0080), and a collection of demographic and inequality-related indicators — such as Gini coefficient (0.0051), Incomplete tertiary education (0.0049), and Sex ratio_lag3 (0.0057). While individually modest, these features collectively suggest nuanced social and economic influences on life expectancy. Several other lagged features — including education and sex ratio metrics — offer additional but subtle predictive value (ranging from 0.0043 to 0.0038).
🔹 Diabetes Here, BMI_avg_lag3 dominates the landscape with an importance score of 0.6894, indicating that historical body mass trends are the strongest driver of diabetes outcomes. A suite of socioeconomic and demographic variables follows: Gini coefficient (0.0524), Median age (0.0503), Income (0.0490), and Incomplete tertiary education (0.0466), each providing meaningful predictive contribution. Additional lagged indicators like Income_lag1 (0.0270), Income_lag2 (0.0214), and Income_lag3 (0.0175) reinforce the relevance of financial conditions over time, while smaller signals such as Child mortality rate (0.0236) and Sex ratio (0.0229) reveal latent health and gender dynamics.
🔹 Cardiovascular Diseases The most prominent predictor is again GDP (0.6041), signaling a strong economic dimension in cardiovascular disease outcomes, likely tied to healthcare access and lifestyle resources. Gini coefficient_lag3 (0.2821) highlights the added impact of income inequality across time, while BMI_avg_lag3 (0.0778) brings in the physiological lens. Median age_lag3 (0.0360) also contributes modestly, suggesting that demographic aging trends can influence disease prevalence. Other features — including Child mortality rate, sex ratios, income levels, and educational indicators — showed negligible impact in this model, reinforcing a more concentrated predictor structure.
Residual diagnostics (heteroscedasticity, autocorrelation)¶
Residual diagnostics and the ADF (Augmented Dickey-Fuller) test are important tools in time series modeling that help ensure the models are valid, interpretable, and produce reliable forecasts.
Residual diagnostics involve analyzing the residuals means the differences between the actual values and the predicted values from your model. These diagnostics test whether your model assumptions hold, particularly in regression or forecasting models. For example, the Breusch-Pagan test checks for heteroscedasticity, which is when the variance of residuals is not constant over time. Constant variance is a key assumption in linear regression; if violated, it can lead to inefficient or biased estimates. Similarly, the Ljung-Box test assesses whether residuals are autocorrelated, which means they are correlated across time. If residuals show autocorrelation, your model has likely failed to capture some time-based structure in the data, indicating the model is underfitting or misspecified. Performing these diagnostics ensures that your model is statistically sound and that the insights or forecasts it provides are trustworthy.
# Residual Diagnostics
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_ljungbox
# Function to run diagnostics
def residual_diagnostics(X, y):
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const).fit()
residuals = model.resid
# Breusch-Pagan Test for Heteroscedasticity
bp_test = het_breuschpagan(residuals, X_const)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
bp_results = dict(zip(bp_labels, bp_test))
# Ljung-Box Test for Autocorrelation (lag = 10)
lb_test = acorr_ljungbox(residuals, lags=[10], return_df=True)
lb_pvalue = lb_test['lb_pvalue'].iloc[0]
return {
"Breusch-Pagan (Heteroscedasticity)": bp_results,
"Ljung-Box p-value (Autocorrelation, lag=10)": lb_pvalue
}
# Define target columns
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
# Loop through each target and run diagnostics
for target_col in target_cols:
print(f"\n=== Residual Diagnostics for: {target_col} ===")
# Define X and y
y = df_lagged[target_col]
X = df_lagged.drop(columns=target_cols) # Exclude all target vars
# Run diagnostics
results = residual_diagnostics(X, y)
# Print results
for test_name, test_result in results.items():
print(f"{test_name}: {test_result}")
# Residual summary for current target
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const).fit()
residuals = model.resid
print(f"Mean of residuals: {residuals.mean():.4f}")
print(f"Variance of residuals: {residuals.var():.4f}")
=== Residual Diagnostics for: Life expectancy ===
Breusch-Pagan (Heteroscedasticity): {'Lagrange multiplier statistic': np.float64(764.4477255858551), 'p-value': np.float64(1.0232367203146819e-129), 'f-value': np.float64(22.26162116493993), 'f p-value': np.float64(4.0766328819801317e-141)}
Ljung-Box p-value (Autocorrelation, lag=10): 0.0
Mean of residuals: 0.0450
Variance of residuals: 17.6104
=== Residual Diagnostics for: Cardiovascular diseases ===
Breusch-Pagan (Heteroscedasticity): {'Lagrange multiplier statistic': np.float64(4099.901314129021), 'p-value': np.float64(0.0), 'f-value': np.float64(153.41325149630802), 'f p-value': np.float64(0.0)}
Ljung-Box p-value (Autocorrelation, lag=10): 0.0
Mean of residuals: -0.1215
Variance of residuals: 19725.8045
=== Residual Diagnostics for: Diabetes ===
Breusch-Pagan (Heteroscedasticity): {'Lagrange multiplier statistic': np.float64(1618.971210445285), 'p-value': np.float64(8.583932328752581e-308), 'f-value': np.float64(49.98610858671592), 'f p-value': np.float64(0.0)}
Ljung-Box p-value (Autocorrelation, lag=10): 0.0
Mean of residuals: -0.0182
Variance of residuals: 11.5035
The residual diagnostics for the regression models on Life Expectancy, Cardiovascular Diseases, and Diabetes reveal key statistical concerns in error behavior. For all three models, the Breusch–Pagan test results indicate strong evidence of heteroscedasticity, with extremely low p-values (e.g., 1.02e−129 for Life Expectancy and effectively zero for the others), confirming that the variance of residuals is not constant and likely depends on the fitted values or predictors. Additionally, the Ljung–Box p-value of 0.0 across all targets points to significant autocorrelation in the residuals at lag 10, meaning the errors are temporally correlated rather than randomly distributed — a violation of classical regression assumptions. The mean residuals for Life Expectancy (0.0450) and Diabetes (−0.0182) are relatively close to zero, suggesting no major bias, while the mean for Cardiovascular Diseases (−0.1215) is slightly more skewed. However, the variance of residuals varies widely: Life Expectancy shows relatively low dispersion (17.6104), Diabetes appears moderately stable (11.5035), but Cardiovascular Diseases displays extremely high residual variance (19725.8045), indicating serious inconsistency in model predictions. Together, these diagnostics highlight potential shortcomings in model specification or data treatment — suggesting the need for techniques that address autocorrelation and non-constant variance, such as robust standard errors, residual modeling, or time-series-specific approaches.
According the result of Residual Diagnostics indicate that the model's residuals exhibit both heteroscedasticity and autocorrelation, which violate the assumption of constant variance and independence of residuals ordinary least squares (OLS) regression.
To solve this problem by using robust standard errors (Heteroskedasticity-Autocorrelation Consistent or HAC standard errors) that account for both heteroscedasticity and autocorrelation in the variance-covariance matrix.
# HAC
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas as pd # Import pandas
# Define target columns - Corrected names
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
# Use df_lagged which was created earlier and contains the necessary data and lags
# Ensure df_lagged is accessible in this cell's scope if it was created in a different cell.
# If df_lagged is not available, you might need to recreate it or load it.
# Assuming df_lagged is available from previous steps.
# Select features for X - Exclude all target vars from features
# Make sure to drop the current target column from X inside the loop
# The `df_lagged` DataFrame has a MultiIndex ('Country', 'Year').
# When using OLS with a MultiIndex DataFrame, statsmodels handles it correctly.
# Print summary
for target in target_cols: # Loop through the corrected target names
print(f"\n=== Newey-West Adjusted OLS Results for: {target} ===")
# Define X and y for the current target
y = df_lagged[target]
# Exclude the current target variable from the predictors
X = df_lagged.drop(columns=target_cols)
# Add constant term to the model (intercept)
X_const = sm.add_constant(X)
# Fit OLS model
model = sm.OLS(y, X_const).fit()
# Apply Newey-West standard errors
# maxlags should be <= nobs / 4, where nobs is the number of observations.
# Let's use a reasonable default, e.g., min(10, nobs - 1) or related to data frequency.
# Given the data is yearly, maxlags=5 might be more appropriate or calculated based on T.
# For simplicity and to avoid potential errors with small subsets, let's start with a small maxlags.
# A more robust approach would dynamically set maxlags based on the actual data length for the model fit.
try:
nw = model.get_robustcov_results(cov_type='HAC', maxlags=5) # Reduced maxlags for robustness
# Display results
print(nw.summary())
# Residual plot for the current target
residuals = nw.resid # Use residuals from the HAC model
plt.figure(figsize=(10, 4))
plt.plot(residuals)
plt.title(f'Residuals over time ({target})')
plt.xlabel('Observation Index') # Using index as x-axis for time series
plt.ylabel('Residual')
plt.grid(True)
plt.show()
except Exception as e:
print(f"Could not fit HAC model or plot residuals for {target}: {e}")
=== Newey-West Adjusted OLS Results for: Life expectancy ===
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 48, but rank is 38
warnings.warn('covariance of constraints does not have full '
OLS Regression Results
==============================================================================
Dep. Variable: Life expectancy R-squared: 0.867
Model: OLS Adj. R-squared: 0.866
Method: Least Squares F-statistic: 2.541e+04
Date: Sat, 12 Jul 2025 Prob (F-statistic): 0.00
Time: 12:55:44 Log-Likelihood: -45098.
No. Observations: 15806 AIC: 9.027e+04
Df Residuals: 15769 BIC: 9.055e+04
Df Model: 36
Covariance Type: HAC
======================================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------------------------
const -0.0031 0.001 -3.745 0.000 -0.005 -0.001
Cost of a healthy diet 0.1279 0.021 6.130 0.000 0.087 0.169
Income 0.0172 0.009 2.015 0.044 0.000 0.034
Inflation -9.053e-05 6.55e-05 -1.382 0.167 -0.000 3.79e-05
Child mortality rate -1.1023 0.114 -9.669 0.000 -1.326 -0.879
Unemployment Rate -0.0106 0.023 -0.464 0.643 -0.056 0.034
Incomplete tertiary education 0.2981 0.063 4.738 0.000 0.175 0.421
Gini coefficient -0.5448 0.119 -4.578 0.000 -0.778 -0.312
Sex ratio 0.2475 0.070 3.558 0.000 0.111 0.384
GDP 1.147e-14 6.56e-15 1.748 0.080 -1.39e-15 2.43e-14
Median age 2.7025 0.393 6.874 0.000 1.932 3.473
CPI 3.895e-05 3.18e-05 1.223 0.221 -2.35e-05 0.000
BMI_avg 0.0401 0.047 0.849 0.396 -0.052 0.133
Cost of a healthy diet_lag1 0.1291 0.021 6.160 0.000 0.088 0.170
Cost of a healthy diet_lag2 0.1288 0.021 6.150 0.000 0.088 0.170
Cost of a healthy diet_lag3 0.1293 0.021 6.168 0.000 0.088 0.170
Income_lag1 -0.0010 0.005 -0.196 0.845 -0.011 0.009
Income_lag2 0.0021 0.005 0.433 0.665 -0.007 0.012
Income_lag3 -0.0069 0.008 -0.820 0.412 -0.023 0.010
Inflation_lag1 -8.712e-05 6.61e-05 -1.319 0.187 -0.000 4.24e-05
Inflation_lag2 -0.0001 7.3e-05 -1.433 0.152 -0.000 3.84e-05
Inflation_lag3 -8.075e-05 8.33e-05 -0.970 0.332 -0.000 8.24e-05
Child mortality rate_lag1 0.0079 0.091 0.086 0.931 -0.171 0.187
Child mortality rate_lag2 -0.0023 0.049 -0.047 0.962 -0.099 0.094
Child mortality rate_lag3 0.1086 0.070 1.561 0.119 -0.028 0.245
Unemployment Rate_lag1 -0.0152 0.020 -0.765 0.444 -0.054 0.024
Unemployment Rate_lag2 -0.0034 0.019 -0.181 0.856 -0.041 0.034
Unemployment Rate_lag3 0.0601 0.028 2.160 0.031 0.006 0.115
Incomplete tertiary education_lag1 0.0288 0.026 1.104 0.270 -0.022 0.080
Incomplete tertiary education_lag2 0.0076 0.027 0.284 0.777 -0.045 0.060
Incomplete tertiary education_lag3 -0.3460 0.064 -5.442 0.000 -0.471 -0.221
Gini coefficient_lag1 -0.5535 0.119 -4.636 0.000 -0.787 -0.319
Gini coefficient_lag2 -0.5532 0.119 -4.637 0.000 -0.787 -0.319
Gini coefficient_lag3 -0.5404 0.119 -4.556 0.000 -0.773 -0.308
Sex ratio_lag1 0.0382 0.039 0.987 0.323 -0.038 0.114
Sex ratio_lag2 -0.0076 0.054 -0.141 0.888 -0.114 0.099
Sex ratio_lag3 0.3177 0.058 5.473 0.000 0.204 0.431
GDP_lag1 1.583e-15 3.66e-15 0.432 0.666 -5.6e-15 8.76e-15
GDP_lag2 3.884e-15 4.06e-15 0.956 0.339 -4.08e-15 1.18e-14
GDP_lag3 -1.433e-15 6.55e-15 -0.219 0.827 -1.43e-14 1.14e-14
Median age_lag1 -3.2667 0.706 -4.626 0.000 -4.651 -1.882
Median age_lag2 2.3629 0.652 3.622 0.000 1.084 3.642
Median age_lag3 -1.5873 0.343 -4.631 0.000 -2.259 -0.915
CPI_lag1 -9.809e-06 1.88e-05 -0.523 0.601 -4.66e-05 2.7e-05
CPI_lag2 -3.535e-06 2.33e-05 -0.152 0.879 -4.92e-05 4.22e-05
CPI_lag3 8.609e-06 1.76e-05 0.488 0.625 -2.59e-05 4.32e-05
BMI_avg_lag1 0.0201 0.024 0.827 0.408 -0.028 0.068
BMI_avg_lag2 -0.0280 0.026 -1.085 0.278 -0.079 0.023
BMI_avg_lag3 0.0645 0.040 1.625 0.104 -0.013 0.142
==============================================================================
Omnibus: 10607.007 Durbin-Watson: 0.111
Prob(Omnibus): 0.000 Jarque-Bera (JB): 271419.904
Skew: -2.858 Prob(JB): 0.00
Kurtosis: 22.480 Cond. No. 1.02e+16
==============================================================================
Notes:
[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 5 lags and without small sample correction
[2] The condition number is large, 1.02e+16. This might indicate that there are
strong multicollinearity or other numerical problems.
=== Newey-West Adjusted OLS Results for: Cardiovascular diseases ===
OLS Regression Results
===================================================================================
Dep. Variable: Cardiovascular diseases R-squared: 0.237
Model: OLS Adj. R-squared: 0.236
Method: Least Squares F-statistic: 5.258
Date: Sat, 12 Jul 2025 Prob (F-statistic): 1.18e-23
Time: 12:55:45 Log-Likelihood: -1.0059e+05
No. Observations: 15806 AIC: 2.012e+05
Df Residuals: 15769 BIC: 2.015e+05
Df Model: 36
Covariance Type: HAC
======================================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------------------------
const -0.1567 0.025 -6.353 0.000 -0.205 -0.108
Cost of a healthy diet 1.6941 0.763 2.220 0.026 0.198 3.190
Income -0.0529 0.384 -0.138 0.890 -0.805 0.699
Inflation -0.0028 0.001 -1.954 0.051 -0.006 9.18e-06
Child mortality rate -2.5370 1.530 -1.658 0.097 -5.536 0.462
Unemployment Rate 0.6162 0.551 1.118 0.264 -0.464 1.697
Incomplete tertiary education 0.9247 2.194 0.422 0.673 -3.375 5.224
Gini coefficient -31.0282 4.594 -6.754 0.000 -40.033 -22.023
Sex ratio 0.5785 0.474 1.220 0.222 -0.351 1.508
GDP 4.48e-12 1.1e-12 4.067 0.000 2.32e-12 6.64e-12
Median age -8.6373 6.934 -1.246 0.213 -22.229 4.955
CPI 0.0008 0.001 1.324 0.185 -0.000 0.002
BMI_avg -0.9365 1.654 -0.566 0.571 -4.179 2.306
Cost of a healthy diet_lag1 1.7184 0.766 2.242 0.025 0.216 3.221
Cost of a healthy diet_lag2 1.7210 0.766 2.245 0.025 0.219 3.223
Cost of a healthy diet_lag3 1.7192 0.767 2.243 0.025 0.217 3.222
Income_lag1 -0.0144 0.219 -0.066 0.948 -0.444 0.416
Income_lag2 0.2196 0.312 0.704 0.481 -0.392 0.831
Income_lag3 -0.4661 0.373 -1.250 0.211 -1.197 0.265
Inflation_lag1 -0.0024 0.001 -2.252 0.024 -0.004 -0.000
Inflation_lag2 -0.0024 0.001 -2.391 0.017 -0.004 -0.000
Inflation_lag3 -0.0026 0.001 -2.373 0.018 -0.005 -0.000
Child mortality rate_lag1 0.0442 0.684 0.065 0.949 -1.297 1.385
Child mortality rate_lag2 0.1855 0.725 0.256 0.798 -1.236 1.607
Child mortality rate_lag3 2.4714 1.466 1.685 0.092 -0.403 5.345
Unemployment Rate_lag1 -0.1863 0.319 -0.585 0.559 -0.811 0.438
Unemployment Rate_lag2 -0.2320 0.324 -0.716 0.474 -0.868 0.404
Unemployment Rate_lag3 -0.4076 0.454 -0.897 0.370 -1.298 0.483
Incomplete tertiary education_lag1 0.4316 1.392 0.310 0.756 -2.296 3.160
Incomplete tertiary education_lag2 0.5947 1.396 0.426 0.670 -2.142 3.332
Incomplete tertiary education_lag3 -1.7024 2.345 -0.726 0.468 -6.298 2.893
Gini coefficient_lag1 -31.0810 4.601 -6.755 0.000 -40.100 -22.062
Gini coefficient_lag2 -31.0686 4.598 -6.757 0.000 -40.081 -22.057
Gini coefficient_lag3 -30.9924 4.583 -6.762 0.000 -39.976 -22.009
Sex ratio_lag1 0.1815 0.265 0.684 0.494 -0.339 0.702
Sex ratio_lag2 -0.1909 0.335 -0.570 0.569 -0.848 0.466
Sex ratio_lag3 0.6822 0.488 1.398 0.162 -0.274 1.639
GDP_lag1 -1.255e-15 2.69e-13 -0.005 0.996 -5.29e-13 5.26e-13
GDP_lag2 -7.842e-14 2.71e-13 -0.289 0.772 -6.09e-13 4.53e-13
GDP_lag3 4.221e-13 6.06e-13 0.697 0.486 -7.66e-13 1.61e-12
Median age_lag1 10.4764 10.144 1.033 0.302 -9.407 30.359
Median age_lag2 -9.4533 10.180 -0.929 0.353 -29.408 10.501
Median age_lag3 9.5787 7.243 1.323 0.186 -4.618 23.776
CPI_lag1 -3.376e-05 0.000 -0.101 0.920 -0.001 0.001
CPI_lag2 0.0001 0.000 0.390 0.696 -0.001 0.001
CPI_lag3 -0.0007 0.001 -1.166 0.244 -0.002 0.000
BMI_avg_lag1 -0.0266 0.930 -0.029 0.977 -1.850 1.797
BMI_avg_lag2 0.0690 0.939 0.074 0.941 -1.771 1.909
BMI_avg_lag3 -4.4425 1.645 -2.701 0.007 -7.666 -1.219
==============================================================================
Omnibus: 15991.150 Durbin-Watson: 0.033
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1337731.938
Skew: 4.903 Prob(JB): 0.00
Kurtosis: 46.989 Cond. No. 1.02e+16
==============================================================================
Notes:
[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 5 lags and without small sample correction
[2] The condition number is large, 1.02e+16. This might indicate that there are
strong multicollinearity or other numerical problems.
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 48, but rank is 38
warnings.warn('covariance of constraints does not have full '
=== Newey-West Adjusted OLS Results for: Diabetes ===
OLS Regression Results
==============================================================================
Dep. Variable: Diabetes R-squared: 0.485
Model: OLS Adj. R-squared: 0.484
Method: Least Squares F-statistic: 627.9
Date: Sat, 12 Jul 2025 Prob (F-statistic): 0.00
Time: 12:55:45 Log-Likelihood: -41732.
No. Observations: 15806 AIC: 8.354e+04
Df Residuals: 15769 BIC: 8.382e+04
Df Model: 36
Covariance Type: HAC
======================================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------------------------
const -0.0009 0.001 -1.400 0.161 -0.002 0.000
Cost of a healthy diet 0.0202 0.021 0.979 0.327 -0.020 0.061
Income -0.0430 0.008 -5.651 0.000 -0.058 -0.028
Inflation -9.333e-05 9.63e-05 -0.970 0.332 -0.000 9.53e-05
Child mortality rate -0.0142 0.032 -0.441 0.660 -0.077 0.049
Unemployment Rate -0.0131 0.020 -0.643 0.520 -0.053 0.027
Incomplete tertiary education -0.1486 0.058 -2.548 0.011 -0.263 -0.034
Gini coefficient -0.1782 0.113 -1.575 0.115 -0.400 0.044
Sex ratio -0.0617 0.033 -1.885 0.059 -0.126 0.002
GDP 3.143e-14 9.87e-15 3.184 0.001 1.21e-14 5.08e-14
Median age -0.2127 0.234 -0.910 0.363 -0.671 0.245
CPI -3.196e-05 2e-05 -1.600 0.110 -7.11e-05 7.2e-06
BMI_avg -0.0933 0.067 -1.393 0.164 -0.225 0.038
Cost of a healthy diet_lag1 0.0204 0.021 0.981 0.327 -0.020 0.061
Cost of a healthy diet_lag2 0.0204 0.021 0.982 0.326 -0.020 0.061
Cost of a healthy diet_lag3 0.0203 0.021 0.980 0.327 -0.020 0.061
Income_lag1 -0.0076 0.006 -1.254 0.210 -0.019 0.004
Income_lag2 -0.0007 0.004 -0.192 0.848 -0.008 0.007
Income_lag3 -0.0100 0.007 -1.471 0.141 -0.023 0.003
Inflation_lag1 -8.162e-05 8.04e-05 -1.016 0.310 -0.000 7.59e-05
Inflation_lag2 -8.514e-05 7.67e-05 -1.110 0.267 -0.000 6.52e-05
Inflation_lag3 -0.0001 9.83e-05 -1.096 0.273 -0.000 8.49e-05
Child mortality rate_lag1 -0.0171 0.020 -0.847 0.397 -0.057 0.023
Child mortality rate_lag2 -0.0141 0.017 -0.806 0.420 -0.048 0.020
Child mortality rate_lag3 0.0255 0.039 0.649 0.516 -0.051 0.102
Unemployment Rate_lag1 -0.0134 0.013 -1.027 0.304 -0.039 0.012
Unemployment Rate_lag2 -0.0175 0.013 -1.361 0.173 -0.043 0.008
Unemployment Rate_lag3 0.0184 0.020 0.910 0.363 -0.021 0.058
Incomplete tertiary education_lag1 0.0080 0.024 0.338 0.735 -0.039 0.055
Incomplete tertiary education_lag2 0.0083 0.024 0.346 0.729 -0.038 0.055
Incomplete tertiary education_lag3 0.1200 0.059 2.035 0.042 0.004 0.236
Gini coefficient_lag1 -0.1780 0.114 -1.568 0.117 -0.401 0.045
Gini coefficient_lag2 -0.1776 0.113 -1.565 0.118 -0.400 0.045
Gini coefficient_lag3 -0.1769 0.113 -1.567 0.117 -0.398 0.044
Sex ratio_lag1 -0.0108 0.019 -0.584 0.559 -0.047 0.026
Sex ratio_lag2 0.0030 0.026 0.117 0.907 -0.047 0.053
Sex ratio_lag3 -0.0788 0.029 -2.673 0.008 -0.137 -0.021
GDP_lag1 -5.664e-16 5.28e-15 -0.107 0.915 -1.09e-14 9.79e-15
GDP_lag2 -1.044e-15 5.2e-15 -0.201 0.841 -1.12e-14 9.15e-15
GDP_lag3 -1.837e-14 9.05e-15 -2.029 0.042 -3.61e-14 -6.28e-16
Median age_lag1 0.1974 0.342 0.577 0.564 -0.473 0.867
Median age_lag2 -0.0377 0.331 -0.114 0.909 -0.687 0.612
Median age_lag3 0.0248 0.220 0.113 0.910 -0.406 0.455
CPI_lag1 -8.336e-07 5.96e-06 -0.140 0.889 -1.25e-05 1.09e-05
CPI_lag2 -4.16e-07 6.6e-06 -0.063 0.950 -1.34e-05 1.25e-05
CPI_lag3 -7.278e-07 1.1e-05 -0.066 0.947 -2.23e-05 2.08e-05
BMI_avg_lag1 -0.0064 0.037 -0.171 0.864 -0.079 0.067
BMI_avg_lag2 0.0122 0.038 0.324 0.746 -0.061 0.086
BMI_avg_lag3 1.1481 0.069 16.636 0.000 1.013 1.283
==============================================================================
Omnibus: 3876.617 Durbin-Watson: 0.101
Prob(Omnibus): 0.000 Jarque-Bera (JB): 12299.107
Skew: 1.248 Prob(JB): 0.00
Kurtosis: 6.527 Cond. No. 1.02e+16
==============================================================================
Notes:
[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 5 lags and without small sample correction
[2] The condition number is large, 1.02e+16. This might indicate that there are
strong multicollinearity or other numerical problems.
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:1894: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 48, but rank is 38
warnings.warn('covariance of constraints does not have full '
Result of HAC:
The OLS regression summary reveals varying levels of model performance across the three health outcomes. The model for Life Expectancy demonstrates very strong explanatory power, with an R-squared of 0.867, indicating that about 87% of the variability in life expectancy is captured by the predictors. The high F-statistic (25,410) and a near-zero p-value confirm that the overall model is statistically significant, supported further by a low AIC (90270) and BIC (90550) that signal good model fit.
In contrast, the model for Cardiovascular Diseases shows notably weaker performance. Its R-squared value is just 0.237, meaning only 24% of the variance is explained by the model. Although the F-statistic (5.258) is statistically significant, the very high AIC (201200) and BIC (201500) suggest limited predictive capacity and potential room for structural improvement or additional variables.
The Diabetes model stands in the middle, with a moderate R-squared of 0.485. This implies that nearly 49% of the variation in diabetes prevalence is explained, which is respectable for population-level health data. A strong F-statistic (627.9) and comparatively lower AIC (83540) and BIC (83820) values suggest better goodness-of-fit than the cardiovascular model, though it doesn't match the strength of the life expectancy regression.
Overall, these results indicate that the predictors are highly effective at modeling life expectancy, reasonably effective for diabetes, and much less so for cardiovascular diseases — prompting consideration for alternative modeling techniques or feature enhancement in that domain. Let me know if you'd like help refining the cardiovascular model or visualizing these diagnostic metrics.
Stationary Check (ADF)¶
On the other hand, the ADF test is used to check whether a time series is stationary, meaning that its statistical properties such as mean and variance do not change over time. Many time series models, especially ARIMA, require stationary input data. The ADF test does this by testing for the presence of a unit root. If the test returns a p-value less than 0.05, it suggests that the data is stationary and does not have a unit root, which is a favorable condition for modeling. If the p-value is higher, it indicates non-stationarity, and you may need to transform the series (e.g., using differencing) before modeling.
In this project, residual diagnostics ensure that the regression assumptions are met, increasing the reliability and interpretability of the models. The ADF test guides your decision on whether time series transformations like differencing are needed before applying models such as ARIMA. Together, these steps strengthen your modeling pipeline by validating model assumptions and ensuring the forecasts are based on appropriate statistical foundations.
# ADF
from statsmodels.tsa.stattools import adfuller
def adf_stationarity_check(series):
adf_result = adfuller(series.dropna())
return {
"ADF Statistic": adf_result[0],
"p-value": adf_result[1],
"Critical Values": adf_result[4]
}
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
for target in target_cols:
# Define y for the current target
y_train = df_lagged[target]
# Exclude only the current target from predictors
X_train = df_lagged.drop(columns=target_cols)
print(f"\n=== Diagnostics for: {target} ===")
print(residual_diagnostics(X_train, y_train)) # Ensure this function is defined elsewhere
print(adf_stationarity_check(y_train))
=== Diagnostics for: Life expectancy ===
{'Breusch-Pagan (Heteroscedasticity)': {'Lagrange multiplier statistic': np.float64(764.4477255858551), 'p-value': np.float64(1.0232367203146819e-129), 'f-value': np.float64(22.26162116493993), 'f p-value': np.float64(4.0766328819801317e-141)}, 'Ljung-Box p-value (Autocorrelation, lag=10)': np.float64(0.0)}
{'ADF Statistic': np.float64(-17.614197057131815), 'p-value': np.float64(3.866478352494326e-30), 'Critical Values': {'1%': np.float64(-3.4307638422205895), '5%': np.float64(-2.8617229010419623), '10%': np.float64(-2.5668673536914586)}}
=== Diagnostics for: Cardiovascular diseases ===
{'Breusch-Pagan (Heteroscedasticity)': {'Lagrange multiplier statistic': np.float64(4099.901314129021), 'p-value': np.float64(0.0), 'f-value': np.float64(153.41325149630802), 'f p-value': np.float64(0.0)}, 'Ljung-Box p-value (Autocorrelation, lag=10)': np.float64(0.0)}
{'ADF Statistic': np.float64(-10.55352996297969), 'p-value': np.float64(8.061050911316133e-19), 'Critical Values': {'1%': np.float64(-3.4307640518480906), '5%': np.float64(-2.861722993682136), '10%': np.float64(-2.5668674030026484)}}
=== Diagnostics for: Diabetes ===
{'Breusch-Pagan (Heteroscedasticity)': {'Lagrange multiplier statistic': np.float64(1618.971210445285), 'p-value': np.float64(8.583932328752581e-308), 'f-value': np.float64(49.98610858671592), 'f p-value': np.float64(0.0)}, 'Ljung-Box p-value (Autocorrelation, lag=10)': np.float64(0.0)}
{'ADF Statistic': np.float64(-10.316528085268), 'p-value': np.float64(3.0869194917113782e-18), 'Critical Values': {'1%': np.float64(-3.4307645768471384), '5%': np.float64(-2.8617232256936633), '10%': np.float64(-2.566867526499447)}}
The residual diagnostics for the models predicting Life Expectancy, Cardiovascular Diseases, and Diabetes reveal consistent patterns of statistical irregularities paired with encouraging signs of stationarity. All three targets show strong signs of heteroscedasticity, confirmed by highly significant Breusch–Pagan test results (p-values essentially zero), indicating that residual variances are not constant — a violation of classical regression assumptions that could affect inference accuracy. Moreover, the Ljung–Box test yields p-values of 0.0 across the board at lag 10, pointing to significant autocorrelation in residuals, which suggests that errors are not independently distributed over time and may be influenced by underlying temporal dependencies.
On a more favorable note, the Augmented Dickey-Fuller (ADF) test for each target strongly rejects the null hypothesis of a unit root, with test statistics well below the critical thresholds and extremely small p-values. This indicates that residuals are stationary, meaning they fluctuate around a stable mean and exhibit no drift — an essential property for time-series models and reliable forecasting.
Together, these diagnostics suggest that while the models capture stable error structures in terms of stationarity, they struggle with error consistency and independence. Addressing these issues may require enhancements such as robust error handling, autoregressive residual modeling, or more dynamic forecasting approaches. Let me know if you'd like help selecting and implementing those improvements — you're truly digging into the core of model reliability
ACF and PACF plot¶
ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plots are visual tools used to analyze the correlation structure of time series data. They help identify patterns and dependencies between data points at different lags (time intervals) and are crucial for determining appropriate models for time series forecasting, particularly AR (Autoregressive) and MA (Moving Average) models.
# ACF and PACF plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Assuming df_lagged is your dataset and contains time-series data
target_cols = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
for target in target_cols:
series = df_lagged[target].dropna()
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
fig.suptitle(f'ACF and PACF for {target}', fontsize=16)
plot_acf(series, lags=40, ax=ax[0])
ax[0].set_title(f'Autocorrelation (ACF) - {target}')
ax[0].set_ylabel('ACF')
plot_pacf(series, lags=40, ax=ax[1], method='ywm')
ax[1].set_title(f'Partial Autocorrelation (PACF) - {target}')
ax[1].set_ylabel('PACF')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()
Restore Country and Year to lagged_df¶
## Restore Country and Year
# Restore Country and Year
df_lagged_with_country = df_lagged.reset_index()
Rolling Forecast Validation (Walk-Forward)¶
10 diversity Countries have been selected by their income level for rolling forecast validation (Walk-Forward) as follows:
- United States - High-income
- Germany - High-income
- Japan - High-income
- Brazil - Upper-middle-income
- India - Lower-middle-income
- Indonesia - Lower-middle-income
- Nigeria - Low-income
- Kenya - Low-income
- Mexico - Upper-middle-income
- Bangladesh - Low-middle-income
# Rolling Forecast - Walk Forward Validation - REVISED
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# === Setup ===
selected_countries = [
'United States', 'Germany', 'Japan', 'Brazil', 'India',
'Indonesia', 'Nigeria', 'Kenya', 'Mexico', 'Bangladesh'
]
target_columns = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
selected_features_dict = {
'Life expectancy': [
'Child mortality rate', 'Median age', 'Incomplete tertiary education_lag3',
'GDP', 'Sex ratio', 'Gini coefficient_lag2', 'Gini coefficient_lag3',
'Sex ratio_lag2', 'Sex ratio_lag3', 'Sex ratio_lag1',
'Gini coefficient', 'Incomplete tertiary education', 'Inflation'
],
'Cardiovascular diseases': [
'GDP', 'Gini coefficient_lag3', 'BMI_avg_lag3', 'Median age_lag3'
],
'Diabetes': [
'BMI_avg_lag3', 'Income', 'Income_lag1', 'Median age',
'Income_lag3', 'Incomplete tertiary education', 'Income_lag2',
'Child mortality rate', 'Sex ratio', 'Gini coefficient'
]
}
start_train = 1950
end_train = 2020
real_eval_period = [2021, 2022, 2023]
# Forecast horizon starts from 2024 and goes till 2073
forecast_horizon = list(range(2024, 2074))
# === Create future rows for years 2024 to 2073
future_rows = []
for country in df_combined_with_country['Country'].unique():
for year in forecast_horizon:
future_rows.append({'Country': country, 'Year': year})
df_future = pd.DataFrame(future_rows)
df_forecast_ready = pd.concat([df_combined_with_country, df_future], ignore_index=True)
df_forecast_ready['Year'] = df_forecast_ready['Year'].astype(int)
# === Impute missing values across all countries and years
df_forecast_ready = (
df_forecast_ready
.sort_values(['Country', 'Year'])
.groupby('Country', group_keys=False)
.apply(lambda x: x.ffill().bfill().infer_objects(copy=False))
.reset_index(drop=True)
)
# === Initialize summary table
predictions_summary = []
# === Forecast Loop ===
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].sort_values('Year')
for target in target_columns:
print(f"\n {country} — {target}")
if target not in df_country.columns:
print(" Target missing")
continue
features = selected_features_dict.get(target, [])
available_features = [f for f in features if f in df_country.columns]
if not available_features:
print(" No usable features found")
continue
df_train = df_country[df_country['Year'].between(start_train, end_train)]
df_eval_real = df_country[df_country['Year'].isin(real_eval_period)]
# === ARIMA ===
arima_rmse = None
try:
df_train_arima = df_train[[target]].copy()
df_train_arima.index = pd.date_range(start=f'{start_train}', periods=len(df_train_arima), freq='YE')
model_arima = ARIMA(df_train_arima, order=(1, 1, 1)).fit()
# Real evaluation
pred_real = model_arima.predict(start=len(df_train_arima), end=len(df_train_arima)+len(df_eval_real)-1)
actual_real = df_eval_real[target].values
arima_rmse = np.sqrt(mean_squared_error(actual_real, pred_real))
# Forecast for 2024-2073
arima_forecast = model_arima.predict(start=len(df_train_arima), end=len(df_train_arima) + len(forecast_horizon) - 1)
print(f"📉 ARIMA RMSE: {arima_rmse:.2f}")
except Exception as e:
print(f" ARIMA error: {e}")
# === Prophet ===
prophet_rmse = None
try:
prophet_df = df_train[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model_prophet = Prophet()
model_prophet.fit(prophet_df)
future_years = real_eval_period + forecast_horizon
future_dates = pd.DataFrame({'ds': pd.to_datetime(future_years, format='%Y')})
forecast_prophet = model_prophet.predict(future_dates)
# Real evaluation
pred_real = forecast_prophet[forecast_prophet['ds'].dt.year.isin(real_eval_period)]['yhat'].values
actual_real = df_eval_real[target].values
prophet_rmse = np.sqrt(mean_squared_error(actual_real, pred_real))
# Forecast for 2024-2073
prophet_forecast = forecast_prophet[forecast_prophet['ds'].dt.year.isin(forecast_horizon)]
print(f" Prophet RMSE: {prophet_rmse:.2f}")
except Exception as e:
print(f" Prophet error: {e}")
#### Random Forest ####
#from google.colab import data_table
#data_table.DataTable(df_forecast)
rf_rmse, rf_forecast = None, [None] * len(df_future)
try:
X = df_country[available_features]
y = df_country[target]
X_train = X[df_country['Year'].between(start_train, end_train)]
y_train = y[df_country['Year'].between(start_train, end_train)]
X_eval = X[df_country['Year'].isin(real_eval_period)]
y_eval = y[df_country['Year'].isin(real_eval_period)]
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
pred_eval = model.predict(X_eval)
rf_rmse = np.sqrt(mean_squared_error(y_eval, pred_eval))
X_forecast = X[df_country['Year'].isin(forecast_horizon)]
if not X_forecast.isnull().any(axis=1).any():
rf_forecast = model.predict(X_forecast).tolist()
except:
pass
# === Append to summary ===
predictions_summary.append({
"Country": country,
"Target": target,
"ARIMA_RMSE": round(arima_rmse, 4) if arima_rmse is not None else None,
"Prophet_RMSE": round(prophet_rmse, 4) if prophet_rmse is not None else None,
"RF_RMSE": round(rf_rmse, 4) if rf_rmse is not None else None
})
# === Final Summary Table ===
df_forecast_validation_summary = pd.DataFrame(predictions_summary)
df_forecast_validation_summary = df_forecast_validation_summary[[
"Country", "Target",
"ARIMA_RMSE", "Prophet_RMSE", "RF_RMSE"
]]
print("\n 📋 Rolling Forecast Validation Summary:")
print(df_forecast_validation_summary)
# Export summary
df_forecast_validation_summary.to_csv("forecast_summary.csv", index=False)
# export
df_forecast_validation_summary.to_csv("forecast_summary.csv", index=False)
United States — Life expectancy 📉 ARIMA RMSE: 2.00
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/k6b9e8jj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kvfbixr1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55728', 'data', 'file=/tmp/tmpfus6j9tk/k6b9e8jj.json', 'init=/tmp/tmpfus6j9tk/kvfbixr1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhgva2299/prophet_model-20250714225355.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:53:55 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:53:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 1.56 United States — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/i17t4f38.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fqaoi3qn.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48554', 'data', 'file=/tmp/tmpfus6j9tk/i17t4f38.json', 'init=/tmp/tmpfus6j9tk/fqaoi3qn.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelry9gguxd/prophet_model-20250714225356.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:53:56 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 1.19
22:53:57 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 11.97
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
United States — Diabetes 📉 ARIMA RMSE: 0.01
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r2f8exp3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fd0_y7sd.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=46691', 'data', 'file=/tmp/tmpfus6j9tk/r2f8exp3.json', 'init=/tmp/tmpfus6j9tk/fd0_y7sd.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelqaflbqpw/prophet_model-20250714225358.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:53:58 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:53:58 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.49 Germany — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gzjdst4y.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/m2uk4kbf.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34654', 'data', 'file=/tmp/tmpfus6j9tk/gzjdst4y.json', 'init=/tmp/tmpfus6j9tk/m2uk4kbf.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model8_yx77yj/prophet_model-20250714225359.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:53:59 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.47
22:53:59 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
Prophet RMSE: 0.61 Germany — Cardiovascular diseases 📉 ARIMA RMSE: 0.43
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/27907kb5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/re87vcrw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60505', 'data', 'file=/tmp/tmpfus6j9tk/27907kb5.json', 'init=/tmp/tmpfus6j9tk/re87vcrw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelj6igl2bw/prophet_model-20250714225359.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:53:59 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:00 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.13 Germany — Diabetes
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/vqiwtfk9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/930xzkv2.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34706', 'data', 'file=/tmp/tmpfus6j9tk/vqiwtfk9.json', 'init=/tmp/tmpfus6j9tk/930xzkv2.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelo5u3z6qj/prophet_model-20250714225400.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:00 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.00
22:54:01 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.76 Japan — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ulyt_y2i.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/4s97bast.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=19551', 'data', 'file=/tmp/tmpfus6j9tk/ulyt_y2i.json', 'init=/tmp/tmpfus6j9tk/4s97bast.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelv2a6ecw8/prophet_model-20250714225403.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:03 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.64
22:54:04 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.58 Japan — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y45dgblp.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/itogmayo.json DEBUG:cmdstanpy:idx 0
📉 ARIMA RMSE: 1.55
DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=1113', 'data', 'file=/tmp/tmpfus6j9tk/y45dgblp.json', 'init=/tmp/tmpfus6j9tk/itogmayo.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model12mu1jf3/prophet_model-20250714225407.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 7.69 Japan — Diabetes 📉 ARIMA RMSE: 0.00
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0d368l0n.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y75k3qqm.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6545', 'data', 'file=/tmp/tmpfus6j9tk/0d368l0n.json', 'init=/tmp/tmpfus6j9tk/y75k3qqm.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelyl_gyffm/prophet_model-20250714225409.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:09 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 1.84
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Brazil — Life expectancy 📉 ARIMA RMSE: 3.01
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/k8b6yy08.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pzab_u6a.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34648', 'data', 'file=/tmp/tmpfus6j9tk/k8b6yy08.json', 'init=/tmp/tmpfus6j9tk/pzab_u6a.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model53e36sre/prophet_model-20250714225410.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:11 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.19 Brazil — Cardiovascular diseases 📉 ARIMA RMSE: 1.82
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fn0_vj3j.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/shdg9zrz.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=26710', 'data', 'file=/tmp/tmpfus6j9tk/fn0_vj3j.json', 'init=/tmp/tmpfus6j9tk/shdg9zrz.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeltcwukffq/prophet_model-20250714225412.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:12 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 6.55 Brazil — Diabetes 📉 ARIMA RMSE: 0.00
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/o7zp6vgc.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z2ay57oc.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=62887', 'data', 'file=/tmp/tmpfus6j9tk/o7zp6vgc.json', 'init=/tmp/tmpfus6j9tk/z2ay57oc.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model043owfby/prophet_model-20250714225414.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:14 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:15 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.19 India — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/e999rhz9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/idwlrkkb.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=32049', 'data', 'file=/tmp/tmpfus6j9tk/e999rhz9.json', 'init=/tmp/tmpfus6j9tk/idwlrkkb.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelj8vex4bv/prophet_model-20250714225417.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:17 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 1.97
22:54:17 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.48 India — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/byhiuax_.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/k9zwvld8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=50154', 'data', 'file=/tmp/tmpfus6j9tk/byhiuax_.json', 'init=/tmp/tmpfus6j9tk/k9zwvld8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelzm27i82p/prophet_model-20250714225420.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:20 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 19.66
22:54:21 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 37.42 India — Diabetes
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6jg2tyad.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cwi14jpe.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60566', 'data', 'file=/tmp/tmpfus6j9tk/6jg2tyad.json', 'init=/tmp/tmpfus6j9tk/cwi14jpe.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelbi6bopvu/prophet_model-20250714225423.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:23 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.02
22:54:23 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.83 Indonesia — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/79v76ucn.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v0uhgrz9.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=30038', 'data', 'file=/tmp/tmpfus6j9tk/79v76ucn.json', 'init=/tmp/tmpfus6j9tk/v0uhgrz9.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelevcd7dtt/prophet_model-20250714225424.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:24 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 1.89
22:54:25 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 1.69 Indonesia — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5k14f9y3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z81txmo0.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=25943', 'data', 'file=/tmp/tmpfus6j9tk/5k14f9y3.json', 'init=/tmp/tmpfus6j9tk/z81txmo0.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model50ahb9y2/prophet_model-20250714225426.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:26 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 8.49
22:54:27 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 8.00 Indonesia — Diabetes
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ppo_f3v2.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/df5l74r_.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=44234', 'data', 'file=/tmp/tmpfus6j9tk/ppo_f3v2.json', 'init=/tmp/tmpfus6j9tk/df5l74r_.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelog4iax39/prophet_model-20250714225428.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:28 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.00
22:54:29 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.71
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/o1cdvkf4.json
Nigeria — Life expectancy 📉 ARIMA RMSE: 0.70
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/1j67ita3.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=37848', 'data', 'file=/tmp/tmpfus6j9tk/o1cdvkf4.json', 'init=/tmp/tmpfus6j9tk/1j67ita3.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0e_2aamb/prophet_model-20250714225429.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:29 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:30 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.37 Nigeria — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dgkyn6cr.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ca8_euyf.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=72798', 'data', 'file=/tmp/tmpfus6j9tk/dgkyn6cr.json', 'init=/tmp/tmpfus6j9tk/ca8_euyf.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model8sk75ftp/prophet_model-20250714225432.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:32 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.72
22:54:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 4.50 Nigeria — Diabetes 📉 ARIMA RMSE: 0.00
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hiuzsx1j.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_l_84v__.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28510', 'data', 'file=/tmp/tmpfus6j9tk/hiuzsx1j.json', 'init=/tmp/tmpfus6j9tk/_l_84v__.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelx1t0vaq6/prophet_model-20250714225435.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:35 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:35 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.14 Kenya — Life expectancy 📉 ARIMA RMSE: 3.24
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d15fg_fu.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6tql93an.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=95483', 'data', 'file=/tmp/tmpfus6j9tk/d15fg_fu.json', 'init=/tmp/tmpfus6j9tk/6tql93an.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpb3n2ksg/prophet_model-20250714225436.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 1.67 Kenya — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q_tmx6t1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dofu1xxq.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=16231', 'data', 'file=/tmp/tmpfus6j9tk/q_tmx6t1.json', 'init=/tmp/tmpfus6j9tk/dofu1xxq.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeltorom1bl/prophet_model-20250714225438.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.12
22:54:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.93
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/407tce1y.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/f8xq1x3f.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29379', 'data', 'file=/tmp/tmpfus6j9tk/407tce1y.json', 'init=/tmp/tmpfus6j9tk/f8xq1x3f.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_vikl_00/prophet_model-20250714225440.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:40 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
Kenya — Diabetes 📉 ARIMA RMSE: 0.00
22:54:40 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 3.48 Mexico — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hxfjmmt7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/44rapkqa.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=94006', 'data', 'file=/tmp/tmpfus6j9tk/hxfjmmt7.json', 'init=/tmp/tmpfus6j9tk/44rapkqa.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modely1pppj68/prophet_model-20250714225441.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:41 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 6.22
22:54:42 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.43
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/n6fts8wi.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z4_8o7vj.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=50073', 'data', 'file=/tmp/tmpfus6j9tk/n6fts8wi.json', 'init=/tmp/tmpfus6j9tk/z4_8o7vj.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modela2mbrnn4/prophet_model-20250714225443.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:43 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
Mexico — Cardiovascular diseases 📉 ARIMA RMSE: 0.58
22:54:44 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.84 Mexico — Diabetes
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/sprmipcw.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/x3s3qw6w.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=93454', 'data', 'file=/tmp/tmpfus6j9tk/sprmipcw.json', 'init=/tmp/tmpfus6j9tk/x3s3qw6w.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelytxrm04e/prophet_model-20250714225445.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:45 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 0.00
22:54:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 0.80 Bangladesh — Life expectancy
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b2wyqpv2.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/4jeuzhlm.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=68825', 'data', 'file=/tmp/tmpfus6j9tk/b2wyqpv2.json', 'init=/tmp/tmpfus6j9tk/4jeuzhlm.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelplo4ln2s/prophet_model-20250714225446.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 2.31
22:54:47 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 1.68 Bangladesh — Cardiovascular diseases
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yfflpmv6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fflmvfwg.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=41266', 'data', 'file=/tmp/tmpfus6j9tk/yfflpmv6.json', 'init=/tmp/tmpfus6j9tk/fflmvfwg.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_81ybuu7/prophet_model-20250714225448.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:48 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
📉 ARIMA RMSE: 1.18
22:54:48 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Prophet RMSE: 6.99 Bangladesh — Diabetes 📉 ARIMA RMSE: 0.00
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/965zsf0t.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/al5iwv1y.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=22801', 'data', 'file=/tmp/tmpfus6j9tk/965zsf0t.json', 'init=/tmp/tmpfus6j9tk/al5iwv1y.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmxzwl0qy/prophet_model-20250714225448.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:54:48 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:54:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet RMSE: 2.99
📋 Rolling Forecast Validation Summary:
Country Target ARIMA_RMSE Prophet_RMSE RF_RMSE
0 United States Life expectancy 1.9969 1.5614 1.1020
1 United States Cardiovascular diseases 1.1904 11.9749 4.3086
2 United States Diabetes 0.0080 0.4896 0.0081
3 Germany Life expectancy 0.4746 0.6124 0.3493
4 Germany Cardiovascular diseases 0.4339 2.1255 0.8296
5 Germany Diabetes 0.0000 2.7582 0.0000
6 Japan Life expectancy 0.6387 0.5765 0.2755
7 Japan Cardiovascular diseases 1.5477 7.6884 2.5709
8 Japan Diabetes 0.0000 1.8411 0.0000
9 Brazil Life expectancy 3.0096 2.1896 1.1742
10 Brazil Cardiovascular diseases 1.8195 6.5472 2.3205
11 Brazil Diabetes 0.0000 0.1860 0.0327
12 India Life expectancy 1.9737 2.4758 2.7723
13 India Cardiovascular diseases 19.6630 37.4210 14.6042
14 India Diabetes 0.0197 0.8306 0.0350
15 Indonesia Life expectancy 1.8872 1.6929 1.7905
16 Indonesia Cardiovascular diseases 8.4866 7.9981 7.3668
17 Indonesia Diabetes 0.0000 0.7121 0.0090
18 Nigeria Life expectancy 0.7003 0.3693 6.4246
19 Nigeria Cardiovascular diseases 0.7164 4.4984 1.7240
20 Nigeria Diabetes 0.0000 0.1408 0.0040
21 Kenya Life expectancy 3.2353 1.6706 0.8421
22 Kenya Cardiovascular diseases 0.1218 0.9335 0.3225
23 Kenya Diabetes 0.0004 3.4797 0.0070
24 Mexico Life expectancy 6.2245 2.4286 2.5185
25 Mexico Cardiovascular diseases 0.5788 0.8437 2.4137
26 Mexico Diabetes 0.0000 0.7997 0.0272
27 Bangladesh Life expectancy 2.3127 1.6767 3.5922
28 Bangladesh Cardiovascular diseases 1.1756 6.9912 0.4101
29 Bangladesh Diabetes 0.0000 2.9878 0.1033
Final Model Training & Forecasting | Evaluation metrics (RMSE, MAPE, R²)¶
# Step 19 Final Model Training & Forecasting - ok
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
import warnings
import logging
warnings.filterwarnings("ignore")
logging.getLogger('statsmodels').setLevel(logging.ERROR)
# === Time Ranges
start_train = 1950
end_train = 2020
eval_years = [2021, 2022, 2023]
forecast_horizon = list(range(2024, 2075))
# === Input Variables
selected_countries = [
'United States', 'Germany', 'Japan', 'Brazil', 'India',
'Indonesia', 'Nigeria', 'Kenya', 'Mexico', 'Bangladesh'
]
target_columns = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
selected_features_dict = {
'Life expectancy': [
'Child mortality rate', 'Median age', 'Incomplete tertiary education_lag3',
'GDP', 'Sex ratio', 'Gini coefficient_lag2', 'Gini coefficient_lag3',
'Sex ratio_lag2', 'Sex ratio_lag3', 'Sex ratio_lag1',
'Gini coefficient', 'Incomplete tertiary education', 'Inflation'
],
'Cardiovascular diseases': [
'GDP', 'Gini coefficient_lag3', 'BMI_avg_lag3', 'Median age_lag3'
],
'Diabetes': [
'BMI_avg_lag3', 'Income', 'Income_lag1', 'Median age',
'Income_lag3', 'Incomplete tertiary education', 'Income_lag2',
'Child mortality rate', 'Sex ratio', 'Gini coefficient'
]
}
# === Ready Dataset (already loaded)
# df_forecast_ready = your real dataset
# === Forecasting and Evaluation
forecast_summary = []
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].sort_values('Year')
for target in target_columns:
if target not in df_country.columns:
continue
features = selected_features_dict.get(target, [])
available_features = [f for f in features if f in df_country.columns]
if not available_features:
continue
df_train = df_country[df_country['Year'].between(start_train, end_train)]
df_eval = df_country[df_country['Year'].isin(eval_years)]
df_forecast = df_country[df_country['Year'].isin(forecast_horizon)]
actual_eval = df_eval[target].values
#### ARIMA ####
arima_rmse, arima_forecast = None, [None] * len(df_forecast)
try:
train_series = df_train[[target]].copy()
train_series.index = pd.date_range(start='1950', periods=len(train_series), freq='YE')
model = ARIMA(train_series, order=(1, 1, 1)).fit()
pred_eval = model.predict(start=len(train_series), end=len(train_series) + len(df_eval) - 1)
arima_rmse = np.sqrt(mean_squared_error(actual_eval, pred_eval))
arima_forecast = model.predict(start=len(train_series) + len(df_eval),
end=len(train_series) + len(df_eval) + len(df_forecast) - 1).tolist()
except:
pass
#### Prophet ####
prophet_rmse, prophet_forecast = None, [None] * len(df_forecast)
try:
prophet_df = df_train[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model = Prophet()
model.fit(prophet_df)
eval_dates = pd.DataFrame({'ds': pd.to_datetime(eval_years, format='%Y')})
forecast_eval = model.predict(eval_dates)
prophet_rmse = np.sqrt(mean_squared_error(actual_eval, forecast_eval['yhat'].values))
forecast_years = pd.DataFrame({'ds': pd.to_datetime(df_forecast['Year'], format='%Y')})
prophet_forecast = model.predict(forecast_years)['yhat'].tolist()
except:
pass
#### Random Forest ####
rf_rmse, rf_forecast = None, [None] * len(df_forecast)
try:
X = df_country[available_features]
y = df_country[target]
X_train = X[df_country['Year'].between(start_train, end_train)]
y_train = y[df_country['Year'].between(start_train, end_train)]
X_eval = X[df_country['Year'].isin(eval_years)]
y_eval = y[df_country['Year'].isin(eval_years)]
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
pred_eval = model.predict(X_eval)
rf_rmse = np.sqrt(mean_squared_error(y_eval, pred_eval))
X_forecast = X[df_country['Year'].isin(forecast_horizon)]
if not X_forecast.isnull().any(axis=1).any():
rf_forecast = model.predict(X_forecast).tolist()
except:
pass
for i, year in enumerate(df_forecast['Year']):
forecast_summary.append({
"Country": country,
"Target": target,
"Year": year,
"ARIMA_RMSE": arima_rmse,
"ARIMA_Forecast": arima_forecast[i],
"Prophet_RMSE": prophet_rmse,
"Prophet_Forecast": prophet_forecast[i],
"RF_RMSE": rf_rmse,
"RF_Forecast": rf_forecast[i]
})
# === Combine All Results
df_model_comparison = pd.DataFrame(forecast_summary)
# === Summary Table: Best Model by RMSE
summary_table = df_model_comparison.groupby(['Country', 'Target'])[['ARIMA_RMSE', 'Prophet_RMSE', 'RF_RMSE']].first().reset_index()
def best_model_picker(row):
scores = {
'ARIMA': row['ARIMA_RMSE'],
'Prophet': row['Prophet_RMSE'],
'RF': row['RF_RMSE']
}
return min(scores, key=lambda k: scores[k] if pd.notnull(scores[k]) else np.inf)
summary_table['🎯 Best_Model'] = summary_table.apply(best_model_picker, axis=1)
# === Display Results
print("\n📊 Summary of Best Models per Country and Target:\n")
print(summary_table[['Country', 'Target', 'ARIMA_RMSE', 'Prophet_RMSE', 'RF_RMSE', '🎯 Best_Model']].to_string(index=False))
# === Optional Preview of Forecasts
sample_years = [2025, 2030, 2040, 2050, 2060, 2074]
df_sample = df_model_comparison[df_model_comparison['Year'].isin(sample_years)]
df_sample = df_sample.sort_values(['Country', 'Target', 'Year'])
print("\n📋 Forecasts for Selected Years:\n")
print(df_sample.head(30).to_string(index=False))
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r4c9jzuz.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/s1f34b_s.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=65770', 'data', 'file=/tmp/tmpfus6j9tk/r4c9jzuz.json', 'init=/tmp/tmpfus6j9tk/s1f34b_s.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelh5gywcku/prophet_model-20250714225504.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:04 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:05 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/old7wzoj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6r1ltl6d.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=47231', 'data', 'file=/tmp/tmpfus6j9tk/old7wzoj.json', 'init=/tmp/tmpfus6j9tk/6r1ltl6d.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelc2f4yjvh/prophet_model-20250714225505.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:05 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:05 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lpm3p70i.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/krkxr_oi.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=94398', 'data', 'file=/tmp/tmpfus6j9tk/lpm3p70i.json', 'init=/tmp/tmpfus6j9tk/krkxr_oi.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelfvdtcbbq/prophet_model-20250714225506.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:06 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:06 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/e3w6avfh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/l9eu3ob9.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31313', 'data', 'file=/tmp/tmpfus6j9tk/e3w6avfh.json', 'init=/tmp/tmpfus6j9tk/l9eu3ob9.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelsrplbx_6/prophet_model-20250714225507.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j4bdwibr.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kvv6xdlw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34024', 'data', 'file=/tmp/tmpfus6j9tk/j4bdwibr.json', 'init=/tmp/tmpfus6j9tk/kvv6xdlw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeln_w30wm7/prophet_model-20250714225508.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:08 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rctfbd9u.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z7klz38v.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=84959', 'data', 'file=/tmp/tmpfus6j9tk/rctfbd9u.json', 'init=/tmp/tmpfus6j9tk/z7klz38v.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelkc9c98ro/prophet_model-20250714225509.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:09 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/iusfcrmj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q7cd1nnr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=26530', 'data', 'file=/tmp/tmpfus6j9tk/iusfcrmj.json', 'init=/tmp/tmpfus6j9tk/q7cd1nnr.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model4mc5_ko4/prophet_model-20250714225512.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:12 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y8ptvc9y.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/u_cqdtc6.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=80555', 'data', 'file=/tmp/tmpfus6j9tk/y8ptvc9y.json', 'init=/tmp/tmpfus6j9tk/u_cqdtc6.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model1qoeuxpp/prophet_model-20250714225513.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_kvdsga4.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/a1liv55q.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=64616', 'data', 'file=/tmp/tmpfus6j9tk/_kvdsga4.json', 'init=/tmp/tmpfus6j9tk/a1liv55q.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpd0hdnbl/prophet_model-20250714225514.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:14 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:14 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3pynzyoz.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0z0fzkd2.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87503', 'data', 'file=/tmp/tmpfus6j9tk/3pynzyoz.json', 'init=/tmp/tmpfus6j9tk/0z0fzkd2.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model5jks46b0/prophet_model-20250714225515.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:15 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:15 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tv5ya445.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ahw7l3wh.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23904', 'data', 'file=/tmp/tmpfus6j9tk/tv5ya445.json', 'init=/tmp/tmpfus6j9tk/ahw7l3wh.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model7830gp2s/prophet_model-20250714225516.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:16 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:17 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/wp7y9xj8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/aa02w8wu.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38349', 'data', 'file=/tmp/tmpfus6j9tk/wp7y9xj8.json', 'init=/tmp/tmpfus6j9tk/aa02w8wu.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelc5bizkqi/prophet_model-20250714225517.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:17 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:17 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/p6anzmjo.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/htv320yk.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=86147', 'data', 'file=/tmp/tmpfus6j9tk/p6anzmjo.json', 'init=/tmp/tmpfus6j9tk/htv320yk.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpxrbbewf/prophet_model-20250714225518.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:18 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:18 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/odfqtqot.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ia2xm0r6.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=25798', 'data', 'file=/tmp/tmpfus6j9tk/odfqtqot.json', 'init=/tmp/tmpfus6j9tk/ia2xm0r6.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelnwij6wat/prophet_model-20250714225518.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:19 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:19 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9pqv2gfy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lsnmee2k.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=84672', 'data', 'file=/tmp/tmpfus6j9tk/9pqv2gfy.json', 'init=/tmp/tmpfus6j9tk/lsnmee2k.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model07f1ijwf/prophet_model-20250714225520.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:20 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:21 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/funy0u5n.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/swea6lr8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=52299', 'data', 'file=/tmp/tmpfus6j9tk/funy0u5n.json', 'init=/tmp/tmpfus6j9tk/swea6lr8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeloezaaddf/prophet_model-20250714225521.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:21 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:21 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/mo0nq8xs.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ietfwbqr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48295', 'data', 'file=/tmp/tmpfus6j9tk/mo0nq8xs.json', 'init=/tmp/tmpfus6j9tk/ietfwbqr.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxqru09dy/prophet_model-20250714225522.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:22 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:23 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fkw26c6x.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y5ak1rzk.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=2475', 'data', 'file=/tmp/tmpfus6j9tk/fkw26c6x.json', 'init=/tmp/tmpfus6j9tk/y5ak1rzk.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwbjj7qel/prophet_model-20250714225524.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:24 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:25 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/blcakxhf.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5su8aobh.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=46827', 'data', 'file=/tmp/tmpfus6j9tk/blcakxhf.json', 'init=/tmp/tmpfus6j9tk/5su8aobh.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelg9htyrgb/prophet_model-20250714225525.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:25 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:26 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ycfa348k.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q1rdp8e8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=58341', 'data', 'file=/tmp/tmpfus6j9tk/ycfa348k.json', 'init=/tmp/tmpfus6j9tk/q1rdp8e8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeludetfrwo/prophet_model-20250714225526.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:26 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:27 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/i1l6y5pc.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/26x13p4t.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=49594', 'data', 'file=/tmp/tmpfus6j9tk/i1l6y5pc.json', 'init=/tmp/tmpfus6j9tk/26x13p4t.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelj_70stpd/prophet_model-20250714225527.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:27 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:27 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_xnoncwy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v9re0ftc.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=98964', 'data', 'file=/tmp/tmpfus6j9tk/_xnoncwy.json', 'init=/tmp/tmpfus6j9tk/v9re0ftc.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelf8tktyz1/prophet_model-20250714225528.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:28 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:28 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/4i79fkup.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hpqrm7u9.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=24598', 'data', 'file=/tmp/tmpfus6j9tk/4i79fkup.json', 'init=/tmp/tmpfus6j9tk/hpqrm7u9.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model6hmxaedm/prophet_model-20250714225529.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:29 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:30 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_58_svuj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7_9l6eum.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=40898', 'data', 'file=/tmp/tmpfus6j9tk/_58_svuj.json', 'init=/tmp/tmpfus6j9tk/7_9l6eum.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelzum150hr/prophet_model-20250714225530.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:30 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:31 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/jxx1pr3h.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/l4z9n9bc.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73837', 'data', 'file=/tmp/tmpfus6j9tk/jxx1pr3h.json', 'init=/tmp/tmpfus6j9tk/l4z9n9bc.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeloel0gqp_/prophet_model-20250714225531.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:31 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:32 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rdvs6327.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/da9m30y7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=49594', 'data', 'file=/tmp/tmpfus6j9tk/rdvs6327.json', 'init=/tmp/tmpfus6j9tk/da9m30y7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhay7ycq8/prophet_model-20250714225533.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:33 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:33 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v82kdnyy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/g88xq39s.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=51093', 'data', 'file=/tmp/tmpfus6j9tk/v82kdnyy.json', 'init=/tmp/tmpfus6j9tk/g88xq39s.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeli119xk41/prophet_model-20250714225534.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:34 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h3h6yf6j.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0sjgmds5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89214', 'data', 'file=/tmp/tmpfus6j9tk/h3h6yf6j.json', 'init=/tmp/tmpfus6j9tk/0sjgmds5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model896oi49i/prophet_model-20250714225534.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:34 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/es14idjr.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5sasg3ib.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=46551', 'data', 'file=/tmp/tmpfus6j9tk/es14idjr.json', 'init=/tmp/tmpfus6j9tk/5sasg3ib.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelr08aoi4z/prophet_model-20250714225537.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/glp01hwx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3vfn1y_c.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75436', 'data', 'file=/tmp/tmpfus6j9tk/glp01hwx.json', 'init=/tmp/tmpfus6j9tk/3vfn1y_c.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model3npox83j/prophet_model-20250714225538.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:55:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:55:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
📊 Summary of Best Models per Country and Target:
Country Target ARIMA_RMSE Prophet_RMSE RF_RMSE 🎯 Best_Model
Bangladesh Cardiovascular diseases 1.175582 6.991238 4.101364e-01 RF
Bangladesh Diabetes 0.000036 2.987844 1.033409e-01 ARIMA
Bangladesh Life expectancy 2.312728 1.676697 3.592230e+00 Prophet
Brazil Cardiovascular diseases 1.819507 6.547227 2.320450e+00 ARIMA
Brazil Diabetes 0.000000 0.186005 3.273123e-02 ARIMA
Brazil Life expectancy 3.009573 2.189554 1.174190e+00 RF
Germany Cardiovascular diseases 0.433925 2.125500 8.295614e-01 ARIMA
Germany Diabetes 0.000000 2.758175 0.000000e+00 ARIMA
Germany Life expectancy 0.474573 0.612408 3.493298e-01 RF
India Cardiovascular diseases 19.662985 37.420988 1.460423e+01 RF
India Diabetes 0.019744 0.830592 3.500000e-02 ARIMA
India Life expectancy 1.973657 2.475751 2.772311e+00 ARIMA
Indonesia Cardiovascular diseases 8.486563 7.998086 7.366761e+00 RF
Indonesia Diabetes 0.000000 0.712114 9.000000e-03 ARIMA
Indonesia Life expectancy 1.887179 1.692886 1.790459e+00 Prophet
Japan Cardiovascular diseases 1.547668 7.688441 2.570900e+00 ARIMA
Japan Diabetes 0.000000 1.841061 4.440892e-15 ARIMA
Japan Life expectancy 0.638746 0.576474 2.755038e-01 RF
Kenya Cardiovascular diseases 0.121752 0.933468 3.225326e-01 ARIMA
Kenya Diabetes 0.000379 3.479734 7.000000e-03 ARIMA
Kenya Life expectancy 3.235337 1.670562 8.420547e-01 RF
Mexico Cardiovascular diseases 0.578806 0.843693 2.413675e+00 ARIMA
Mexico Diabetes 0.000000 0.799705 2.718455e-02 ARIMA
Mexico Life expectancy 6.224500 2.428620 2.518455e+00 Prophet
Nigeria Cardiovascular diseases 0.716350 4.498448 1.724021e+00 ARIMA
Nigeria Diabetes 0.000000 0.140798 4.000000e-03 ARIMA
Nigeria Life expectancy 0.700330 0.369290 6.424644e+00 Prophet
United States Cardiovascular diseases 1.190369 11.974926 4.308573e+00 ARIMA
United States Diabetes 0.007983 0.489566 8.082904e-03 ARIMA
United States Life expectancy 1.996910 1.561422 1.102021e+00 RF
📋 Forecasts for Selected Years:
Country Target Year ARIMA_RMSE ARIMA_Forecast Prophet_RMSE Prophet_Forecast RF_RMSE RF_Forecast
Bangladesh Cardiovascular diseases 2025 1.175582 30.440474 6.991238 22.343115 0.410136 27.854558
Bangladesh Cardiovascular diseases 2030 1.175582 31.940794 6.991238 24.463361 0.410136 27.854558
Bangladesh Cardiovascular diseases 2040 1.175582 34.260180 6.991238 28.785528 0.410136 27.854558
Bangladesh Cardiovascular diseases 2050 1.175582 35.897214 6.991238 32.425885 0.410136 27.854558
Bangladesh Cardiovascular diseases 2060 1.175582 37.052640 6.991238 36.748052 0.410136 27.854558
Bangladesh Diabetes 2025 0.000036 9.800033 2.987844 6.593908 0.103341 9.689000
Bangladesh Diabetes 2030 0.000036 9.800026 2.987844 6.364059 0.103341 9.689000
Bangladesh Diabetes 2040 0.000036 9.800027 2.987844 5.788692 0.103341 9.689000
Bangladesh Diabetes 2050 0.000036 9.800027 2.987844 5.211562 0.103341 9.689000
Bangladesh Diabetes 2060 0.000036 9.800027 2.987844 4.636195 0.103341 9.689000
Bangladesh Life expectancy 2025 2.312728 71.671340 1.676697 76.010260 3.592230 69.439186
Bangladesh Life expectancy 2030 2.312728 71.671358 1.676697 77.911348 3.592230 69.439186
Bangladesh Life expectancy 2040 2.312728 71.671358 1.676697 83.766696 3.592230 69.439186
Bangladesh Life expectancy 2050 2.312728 71.671358 1.676697 88.736171 3.592230 69.439186
Bangladesh Life expectancy 2060 2.312728 71.671358 1.676697 94.591519 3.592230 69.439186
Brazil Cardiovascular diseases 2025 1.819507 37.512576 6.547227 34.048560 2.320450 36.784485
Brazil Cardiovascular diseases 2030 1.819507 38.026433 6.547227 37.240321 2.320450 36.784485
Brazil Cardiovascular diseases 2040 1.819507 38.724635 6.547227 43.801768 2.320450 36.784485
Brazil Cardiovascular diseases 2050 1.819507 39.136841 6.547227 49.309747 2.320450 36.784485
Brazil Cardiovascular diseases 2060 1.819507 39.380199 6.547227 55.871194 2.320450 36.784485
Brazil Diabetes 2025 0.000000 8.300000 0.186005 8.233562 0.032731 8.290000
Brazil Diabetes 2030 0.000000 8.300000 0.186005 8.506609 0.032731 8.290000
Brazil Diabetes 2040 0.000000 8.300000 0.186005 8.908965 0.032731 8.290000
Brazil Diabetes 2050 0.000000 8.300000 0.186005 9.428119 0.032731 8.290000
Brazil Diabetes 2060 0.000000 8.300000 0.186005 9.830475 0.032731 8.290000
Brazil Life expectancy 2025 3.009573 69.422619 2.189554 77.316414 1.174190 75.387073
Brazil Life expectancy 2030 3.009573 65.412065 2.189554 78.606403 1.174190 75.387073
Brazil Life expectancy 2040 3.009573 59.751236 2.189554 81.065716 1.174190 75.387073
Brazil Life expectancy 2050 3.009573 56.227383 2.189554 83.708557 1.174190 75.387073
Brazil Life expectancy 2060 3.009573 54.033793 2.189554 86.167871 1.174190 75.387073
# Summary of Best Models per Country and Target
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
import warnings
import logging
warnings.filterwarnings("ignore")
logging.getLogger('statsmodels').setLevel(logging.ERROR)
# === Time Ranges
start_train = 1950
end_train = 2020
eval_years = [2021, 2022, 2023]
forecast_horizon = list(range(2024, 2075))
# === Input Variables
selected_countries = [
'United States', 'Germany', 'Japan', 'Brazil', 'India',
'Indonesia', 'Nigeria', 'Kenya', 'Mexico', 'Bangladesh'
]
target_columns = ['Life expectancy', 'Cardiovascular diseases', 'Diabetes']
selected_features_dict = {
'Life expectancy': [
'Child mortality rate', 'Median age', 'Incomplete tertiary education_lag3',
'GDP', 'Sex ratio', 'Gini coefficient_lag2', 'Gini coefficient_lag3',
'Sex ratio_lag2', 'Sex ratio_lag3', 'Sex ratio_lag1',
'Gini coefficient', 'Incomplete tertiary education', 'Inflation'
],
'Cardiovascular diseases': [
'GDP', 'Gini coefficient_lag3', 'BMI_avg_lag3', 'Median age_lag3'
],
'Diabetes': [
'BMI_avg_lag3', 'Income', 'Income_lag1', 'Median age',
'Income_lag3', 'Incomplete tertiary education', 'Income_lag2',
'Child mortality rate', 'Sex ratio', 'Gini coefficient'
]
}
# === Ready Dataset (already loaded)
# df_forecast_ready = your real dataset
# === Forecasting and Evaluation
forecast_summary = []
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].sort_values('Year')
for target in target_columns:
if target not in df_country.columns:
continue
features = selected_features_dict.get(target, [])
available_features = [f for f in features if f in df_country.columns]
if not available_features:
continue
df_train = df_country[df_country['Year'].between(start_train, end_train)]
df_eval = df_country[df_country['Year'].isin(eval_years)]
df_forecast = df_country[df_country['Year'].isin(forecast_horizon)]
actual_eval = df_eval[target].values
#### ARIMA ####
arima_rmse, arima_forecast = None, [None] * len(df_forecast)
try:
train_series = df_train[[target]].copy()
train_series.index = pd.date_range(start='1950', periods=len(train_series), freq='YE')
model = ARIMA(train_series, order=(1, 1, 1)).fit()
pred_eval = model.predict(start=len(train_series), end=len(train_series) + len(df_eval) - 1)
arima_rmse = np.sqrt(mean_squared_error(actual_eval, pred_eval))
arima_forecast = model.predict(start=len(train_series) + len(df_eval),
end=len(train_series) + len(df_eval) + len(df_forecast) - 1).tolist()
except:
pass
#### Prophet ####
prophet_rmse, prophet_forecast = None, [None] * len(df_forecast)
try:
prophet_df = df_train[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model = Prophet()
model.fit(prophet_df)
eval_dates = pd.DataFrame({'ds': pd.to_datetime(eval_years, format='%Y')})
forecast_eval = model.predict(eval_dates)
prophet_rmse = np.sqrt(mean_squared_error(actual_eval, forecast_eval['yhat'].values))
forecast_years = pd.DataFrame({'ds': pd.to_datetime(df_forecast['Year'], format='%Y')})
prophet_forecast = model.predict(forecast_years)['yhat'].tolist()
except:
pass
#### Random Forest ####
rf_rmse, rf_forecast = None, [None] * len(df_forecast)
try:
X = df_country[available_features]
y = df_country[target]
X_train = X[df_country['Year'].between(start_train, end_train)]
y_train = y[df_country['Year'].between(start_train, end_train)]
X_eval = X[df_country['Year'].isin(eval_years)]
y_eval = y[df_country['Year'].isin(eval_years)]
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
pred_eval = model.predict(X_eval)
rf_rmse = np.sqrt(mean_squared_error(y_eval, pred_eval))
X_forecast = X[df_country['Year'].isin(forecast_horizon)]
if not X_forecast.isnull().any(axis=1).any():
rf_forecast = model.predict(X_forecast).tolist()
except:
pass
for i, year in enumerate(df_forecast['Year']):
forecast_summary.append({
"Country": country,
"Target": target,
"Year": year,
"ARIMA_RMSE": arima_rmse,
"ARIMA_Forecast": arima_forecast[i],
"Prophet_RMSE": prophet_rmse,
"Prophet_Forecast": prophet_forecast[i],
"RF_RMSE": rf_rmse,
"RF_Forecast": rf_forecast[i]
})
# === Combine All Results
df_model_comparison = pd.DataFrame(forecast_summary)
# === Summary Table: Best Model by RMSE
summary_table = df_model_comparison.groupby(['Country', 'Target'])[['ARIMA_RMSE', 'Prophet_RMSE', 'RF_RMSE']].first().reset_index()
def best_model_picker(row):
scores = {
'ARIMA': row['ARIMA_RMSE'],
'Prophet': row['Prophet_RMSE'],
'RF': row['RF_RMSE']
}
return min(scores, key=lambda k: scores[k] if pd.notnull(scores[k]) else np.inf)
summary_table['🎯 Best_Model'] = summary_table.apply(best_model_picker, axis=1)
# === Display Results
print("\n📊 Summary of Best Models per Country and Target:\n")
print(summary_table[['Country', 'Target', 'ARIMA_RMSE', 'Prophet_RMSE', 'RF_RMSE', '🎯 Best_Model']].to_string(index=False))
# === Optional Preview of Forecasts
sample_years = [2025, 2030, 2040, 2050, 2060, 2074]
df_sample = df_model_comparison[df_model_comparison['Year'].isin(sample_years)]
df_sample = df_sample.sort_values(['Country', 'Target', 'Year'])
print("\n📋 Forecasts for Selected Years:\n")
print(df_sample.head(30).to_string(index=False))
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0qc7e0bs.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/m35fzw2w.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=84455', 'data', 'file=/tmp/tmpfus6j9tk/0qc7e0bs.json', 'init=/tmp/tmpfus6j9tk/m35fzw2w.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model6fv33pfk/prophet_model-20250714225718.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:18 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:19 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gy1axum8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lqitem6u.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=35761', 'data', 'file=/tmp/tmpfus6j9tk/gy1axum8.json', 'init=/tmp/tmpfus6j9tk/lqitem6u.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0ozhwsji/prophet_model-20250714225720.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:20 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:20 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j1giflub.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0m0z7av0.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=24065', 'data', 'file=/tmp/tmpfus6j9tk/j1giflub.json', 'init=/tmp/tmpfus6j9tk/0m0z7av0.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model6rcaqtqg/prophet_model-20250714225721.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:21 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:21 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hpf4h3ce.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r7p1rsmi.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23811', 'data', 'file=/tmp/tmpfus6j9tk/hpf4h3ce.json', 'init=/tmp/tmpfus6j9tk/r7p1rsmi.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelnfx0_c5_/prophet_model-20250714225721.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:21 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:22 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/a3kolotr.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/f7z_d3wj.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=96627', 'data', 'file=/tmp/tmpfus6j9tk/a3kolotr.json', 'init=/tmp/tmpfus6j9tk/f7z_d3wj.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelw973279t/prophet_model-20250714225722.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:22 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:23 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gmwq5_1r.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9elo61af.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=52920', 'data', 'file=/tmp/tmpfus6j9tk/gmwq5_1r.json', 'init=/tmp/tmpfus6j9tk/9elo61af.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelne3uqqzn/prophet_model-20250714225724.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:24 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:24 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d38q01v9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/029fr5nj.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=9016', 'data', 'file=/tmp/tmpfus6j9tk/d38q01v9.json', 'init=/tmp/tmpfus6j9tk/029fr5nj.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelca5m3a77/prophet_model-20250714225728.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:28 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:29 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/o7lfen4x.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/txr8dabp.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33931', 'data', 'file=/tmp/tmpfus6j9tk/o7lfen4x.json', 'init=/tmp/tmpfus6j9tk/txr8dabp.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0un998vp/prophet_model-20250714225731.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:31 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:31 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2mys6yok.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/l4y4f677.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=51034', 'data', 'file=/tmp/tmpfus6j9tk/2mys6yok.json', 'init=/tmp/tmpfus6j9tk/l4y4f677.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeled436ljs/prophet_model-20250714225732.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:32 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:33 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/30g122ip.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hvu1xqsn.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=10548', 'data', 'file=/tmp/tmpfus6j9tk/30g122ip.json', 'init=/tmp/tmpfus6j9tk/hvu1xqsn.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelgiin6wqa/prophet_model-20250714225734.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:34 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:35 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dv14qhwy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y39lcg5q.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=5865', 'data', 'file=/tmp/tmpfus6j9tk/dv14qhwy.json', 'init=/tmp/tmpfus6j9tk/y39lcg5q.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model584_ltcb/prophet_model-20250714225736.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0yzcukga.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/vnt05n63.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=5870', 'data', 'file=/tmp/tmpfus6j9tk/0yzcukga.json', 'init=/tmp/tmpfus6j9tk/vnt05n63.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model8____7r_/prophet_model-20250714225738.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ne04pbyd.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rtb6uoka.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=35413', 'data', 'file=/tmp/tmpfus6j9tk/ne04pbyd.json', 'init=/tmp/tmpfus6j9tk/rtb6uoka.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model81nvvpjh/prophet_model-20250714225742.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:42 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:43 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/vu3pxf9q.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ekrgoz0a.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=24993', 'data', 'file=/tmp/tmpfus6j9tk/vu3pxf9q.json', 'init=/tmp/tmpfus6j9tk/ekrgoz0a.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeltzlicmys/prophet_model-20250714225744.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ucw2mhto.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yxzod5gm.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60916', 'data', 'file=/tmp/tmpfus6j9tk/ucw2mhto.json', 'init=/tmp/tmpfus6j9tk/yxzod5gm.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwpcxmp_p/prophet_model-20250714225746.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:47 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d9hwb1dp.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ivi73c1p.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60760', 'data', 'file=/tmp/tmpfus6j9tk/d9hwb1dp.json', 'init=/tmp/tmpfus6j9tk/ivi73c1p.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelw0uh3vrp/prophet_model-20250714225749.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lz6_2a4c.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/40ja2yxw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=60917', 'data', 'file=/tmp/tmpfus6j9tk/lz6_2a4c.json', 'init=/tmp/tmpfus6j9tk/40ja2yxw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelenvlprf4/prophet_model-20250714225754.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:54 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y5n1vw7z.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b_z424ee.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=99490', 'data', 'file=/tmp/tmpfus6j9tk/y5n1vw7z.json', 'init=/tmp/tmpfus6j9tk/b_z424ee.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modele7rf79_q/prophet_model-20250714225757.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:57 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:58 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pzi5chpx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rnet1ua5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=16460', 'data', 'file=/tmp/tmpfus6j9tk/pzi5chpx.json', 'init=/tmp/tmpfus6j9tk/rnet1ua5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelayswbtba/prophet_model-20250714225758.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:57:58 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:57:59 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v9cvp15z.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/oo8aw8dx.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=2721', 'data', 'file=/tmp/tmpfus6j9tk/v9cvp15z.json', 'init=/tmp/tmpfus6j9tk/oo8aw8dx.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxfh2568i/prophet_model-20250714225800.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:00 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:00 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_hilpzhj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q4lqsn7f.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=77690', 'data', 'file=/tmp/tmpfus6j9tk/_hilpzhj.json', 'init=/tmp/tmpfus6j9tk/q4lqsn7f.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model2nun1cwl/prophet_model-20250714225802.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:02 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qqw1xnou.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5090odv2.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=10932', 'data', 'file=/tmp/tmpfus6j9tk/qqw1xnou.json', 'init=/tmp/tmpfus6j9tk/5090odv2.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelcyze5m4k/prophet_model-20250714225804.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:04 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:04 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8wcrqnty.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ithb4k1b.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=11002', 'data', 'file=/tmp/tmpfus6j9tk/8wcrqnty.json', 'init=/tmp/tmpfus6j9tk/ithb4k1b.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0nd5nx9r/prophet_model-20250714225805.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:05 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:05 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/p1i6p9c6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tj684117.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=90891', 'data', 'file=/tmp/tmpfus6j9tk/p1i6p9c6.json', 'init=/tmp/tmpfus6j9tk/tj684117.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelm8u59hvg/prophet_model-20250714225807.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/uwx2tmv4.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j2587fb8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23592', 'data', 'file=/tmp/tmpfus6j9tk/uwx2tmv4.json', 'init=/tmp/tmpfus6j9tk/j2587fb8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelx8wiod9_/prophet_model-20250714225809.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:09 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hnp9ckyh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/735avjbg.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=53694', 'data', 'file=/tmp/tmpfus6j9tk/hnp9ckyh.json', 'init=/tmp/tmpfus6j9tk/735avjbg.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelkjewstsn/prophet_model-20250714225810.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bkeyybji.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/mqtzm11b.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=8014', 'data', 'file=/tmp/tmpfus6j9tk/bkeyybji.json', 'init=/tmp/tmpfus6j9tk/mqtzm11b.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelc5jhpzvi/prophet_model-20250714225810.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:11 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/znbj59i9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/__32kyde.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=13718', 'data', 'file=/tmp/tmpfus6j9tk/znbj59i9.json', 'init=/tmp/tmpfus6j9tk/__32kyde.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelahfffles/prophet_model-20250714225812.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:12 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/potsgkqs.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/u8146kk1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=12034', 'data', 'file=/tmp/tmpfus6j9tk/potsgkqs.json', 'init=/tmp/tmpfus6j9tk/u8146kk1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model527ql_n2/prophet_model-20250714225813.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7noqkwx8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/825lzokb.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=5693', 'data', 'file=/tmp/tmpfus6j9tk/7noqkwx8.json', 'init=/tmp/tmpfus6j9tk/825lzokb.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelocolcpwx/prophet_model-20250714225813.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:58:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:58:14 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
📊 Summary of Best Models per Country and Target:
Country Target ARIMA_RMSE Prophet_RMSE RF_RMSE 🎯 Best_Model
Bangladesh Cardiovascular diseases 1.175582 6.991238 4.101364e-01 RF
Bangladesh Diabetes 0.000036 2.987844 1.033409e-01 ARIMA
Bangladesh Life expectancy 2.312728 1.676697 3.592230e+00 Prophet
Brazil Cardiovascular diseases 1.819507 6.547227 2.320450e+00 ARIMA
Brazil Diabetes 0.000000 0.186005 3.273123e-02 ARIMA
Brazil Life expectancy 3.009573 2.189554 1.174190e+00 RF
Germany Cardiovascular diseases 0.433925 2.125500 8.295614e-01 ARIMA
Germany Diabetes 0.000000 2.758175 0.000000e+00 ARIMA
Germany Life expectancy 0.474573 0.612408 3.493298e-01 RF
India Cardiovascular diseases 19.662985 37.420988 1.460423e+01 RF
India Diabetes 0.019744 0.830592 3.500000e-02 ARIMA
India Life expectancy 1.973657 2.475751 2.772311e+00 ARIMA
Indonesia Cardiovascular diseases 8.486563 7.998086 7.366761e+00 RF
Indonesia Diabetes 0.000000 0.712114 9.000000e-03 ARIMA
Indonesia Life expectancy 1.887179 1.692886 1.790459e+00 Prophet
Japan Cardiovascular diseases 1.547668 7.688441 2.570900e+00 ARIMA
Japan Diabetes 0.000000 1.841061 4.440892e-15 ARIMA
Japan Life expectancy 0.638746 0.576474 2.755038e-01 RF
Kenya Cardiovascular diseases 0.121752 0.933468 3.225326e-01 ARIMA
Kenya Diabetes 0.000379 3.479734 7.000000e-03 ARIMA
Kenya Life expectancy 3.235337 1.670562 8.420547e-01 RF
Mexico Cardiovascular diseases 0.578806 0.843693 2.413675e+00 ARIMA
Mexico Diabetes 0.000000 0.799705 2.718455e-02 ARIMA
Mexico Life expectancy 6.224500 2.428620 2.518455e+00 Prophet
Nigeria Cardiovascular diseases 0.716350 4.498448 1.724021e+00 ARIMA
Nigeria Diabetes 0.000000 0.140798 4.000000e-03 ARIMA
Nigeria Life expectancy 0.700330 0.369290 6.424644e+00 Prophet
United States Cardiovascular diseases 1.190369 11.974926 4.308573e+00 ARIMA
United States Diabetes 0.007983 0.489566 8.082904e-03 ARIMA
United States Life expectancy 1.996910 1.561422 1.102021e+00 RF
📋 Forecasts for Selected Years:
Country Target Year ARIMA_RMSE ARIMA_Forecast Prophet_RMSE Prophet_Forecast RF_RMSE RF_Forecast
Bangladesh Cardiovascular diseases 2025 1.175582 30.440474 6.991238 22.343115 0.410136 27.854558
Bangladesh Cardiovascular diseases 2030 1.175582 31.940794 6.991238 24.463361 0.410136 27.854558
Bangladesh Cardiovascular diseases 2040 1.175582 34.260180 6.991238 28.785528 0.410136 27.854558
Bangladesh Cardiovascular diseases 2050 1.175582 35.897214 6.991238 32.425885 0.410136 27.854558
Bangladesh Cardiovascular diseases 2060 1.175582 37.052640 6.991238 36.748052 0.410136 27.854558
Bangladesh Diabetes 2025 0.000036 9.800033 2.987844 6.593908 0.103341 9.689000
Bangladesh Diabetes 2030 0.000036 9.800026 2.987844 6.364059 0.103341 9.689000
Bangladesh Diabetes 2040 0.000036 9.800027 2.987844 5.788692 0.103341 9.689000
Bangladesh Diabetes 2050 0.000036 9.800027 2.987844 5.211562 0.103341 9.689000
Bangladesh Diabetes 2060 0.000036 9.800027 2.987844 4.636195 0.103341 9.689000
Bangladesh Life expectancy 2025 2.312728 71.671340 1.676697 76.010260 3.592230 69.439186
Bangladesh Life expectancy 2030 2.312728 71.671358 1.676697 77.911348 3.592230 69.439186
Bangladesh Life expectancy 2040 2.312728 71.671358 1.676697 83.766696 3.592230 69.439186
Bangladesh Life expectancy 2050 2.312728 71.671358 1.676697 88.736171 3.592230 69.439186
Bangladesh Life expectancy 2060 2.312728 71.671358 1.676697 94.591519 3.592230 69.439186
Brazil Cardiovascular diseases 2025 1.819507 37.512576 6.547227 34.048560 2.320450 36.784485
Brazil Cardiovascular diseases 2030 1.819507 38.026433 6.547227 37.240321 2.320450 36.784485
Brazil Cardiovascular diseases 2040 1.819507 38.724635 6.547227 43.801768 2.320450 36.784485
Brazil Cardiovascular diseases 2050 1.819507 39.136841 6.547227 49.309747 2.320450 36.784485
Brazil Cardiovascular diseases 2060 1.819507 39.380199 6.547227 55.871194 2.320450 36.784485
Brazil Diabetes 2025 0.000000 8.300000 0.186005 8.233562 0.032731 8.290000
Brazil Diabetes 2030 0.000000 8.300000 0.186005 8.506609 0.032731 8.290000
Brazil Diabetes 2040 0.000000 8.300000 0.186005 8.908965 0.032731 8.290000
Brazil Diabetes 2050 0.000000 8.300000 0.186005 9.428119 0.032731 8.290000
Brazil Diabetes 2060 0.000000 8.300000 0.186005 9.830475 0.032731 8.290000
Brazil Life expectancy 2025 3.009573 69.422619 2.189554 77.316414 1.174190 75.387073
Brazil Life expectancy 2030 3.009573 65.412065 2.189554 78.606403 1.174190 75.387073
Brazil Life expectancy 2040 3.009573 59.751236 2.189554 81.065716 1.174190 75.387073
Brazil Life expectancy 2050 3.009573 56.227383 2.189554 83.708557 1.174190 75.387073
Brazil Life expectancy 2060 3.009573 54.033793 2.189554 86.167871 1.174190 75.387073
# Evaluation metrics (RMSE, MAPE, R²)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def calculate_metrics(actual, predicted):
rmse = np.sqrt(mean_squared_error(actual, predicted))
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
return round(rmse, 4), round(mape, 2), round(r2, 4)
metrics_summary = []
eval_results = [] # Add this above your for-country loop to initialize the collector
# Evaluation years
eval_years = [2021, 2022, 2023]
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country]
for target in target_columns:
if target not in df_country.columns:
continue
actual = df_country[df_country['Year'].isin(eval_years)][target].values
# --- ARIMA ---
try:
train_series = df_country[df_country['Year'].between(1950, 2020)][[target]]
train_series.index = pd.date_range(start='1950', periods=len(train_series), freq='YE')
model_arima = ARIMA(train_series, order=(1, 1, 1)).fit()
arima_pred = model_arima.predict(start=len(train_series), end=len(train_series)+len(eval_years)-1)
arima_rmse, arima_mape, arima_r2 = calculate_metrics(actual, arima_pred)
metrics_summary.append({
"Country": country, "Target": target, "Model": "ARIMA",
"RMSE": arima_rmse, "MAPE": arima_mape, "R²": arima_r2
})
except:
pass
# --- Prophet ---
try:
prophet_df = df_country[df_country['Year'].between(1950, 2020)][['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model_prophet = Prophet()
model_prophet.fit(prophet_df)
future_eval = pd.DataFrame({'ds': pd.to_datetime(eval_years, format='%Y')})
prophet_pred = model_prophet.predict(future_eval)['yhat'].values
prophet_rmse, prophet_mape, prophet_r2 = calculate_metrics(actual, prophet_pred)
metrics_summary.append({
"Country": country, "Target": target, "Model": "Prophet",
"RMSE": prophet_rmse, "MAPE": prophet_mape, "R²": prophet_r2
})
except:
pass
# --- Random Forest ---
try:
features = selected_features_dict.get(target, [])
available = [f for f in features if f in df_country.columns]
X = df_country[available]
y = df_country[target]
X_train = X[df_country['Year'].between(1950, 2020)]
y_train = y[df_country['Year'].between(1950, 2020)]
X_eval = X[df_country['Year'].isin(eval_years)]
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
rf_pred = model_rf.predict(X_eval)
rf_rmse, rf_mape, rf_r2 = calculate_metrics(actual, rf_pred)
# ✅ Add this block
eval_rows = pd.DataFrame({
"Country": [country] * len(eval_years),
"Target": [target] * len(eval_years),
"Year": eval_years,
"Prediction": rf_pred,
"Actual": actual
})
eval_results.append(eval_rows)
metrics_summary.append({
"Country": country, "Target": target, "Model": "Random Forest",
"RMSE": rf_rmse, "MAPE": rf_mape, "R²": rf_r2
})
except:
pass
df_eval_pred = pd.concat(eval_results, ignore_index=True)
def pick_best_model(group):
return group.loc[group['RMSE'].idxmin(), 'Model']
# Convert to DataFrame
df_metrics = pd.DataFrame(metrics_summary)
# Sort it and assign it to df_metrics_sorted
df_metrics_sorted = df_metrics.sort_values(['Country', 'Target', 'Model']).reset_index(drop=True)
# Best model picker function
def pick_best_model(group):
return group.loc[group['RMSE'].idxmin(), 'Model']
# Assign Best_Model using groupby and transform
df_metrics_sorted['Best_Model'] = df_metrics_sorted.groupby(['Country', 'Target'])['RMSE'].transform(
lambda x: df_metrics_sorted.loc[x.idxmin(), 'Model']
)
# Display full table
print("\n🎯 Step 20: Evaluation Summary with Best Model\n")
print(df_metrics_sorted[['Country', 'Target', 'Model', 'RMSE', 'MAPE', 'R²', 'Best_Model']].to_string(index=False))
#df_eval_pred = pd.concat(eval_results, ignore_index=True)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/buxlhuvh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/uvo9z782.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31249', 'data', 'file=/tmp/tmpfus6j9tk/buxlhuvh.json', 'init=/tmp/tmpfus6j9tk/uvo9z782.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxwnaswnk/prophet_model-20250714225937.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2femmil4.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3nc2sva5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=42250', 'data', 'file=/tmp/tmpfus6j9tk/2femmil4.json', 'init=/tmp/tmpfus6j9tk/3nc2sva5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelbtwm7qpg/prophet_model-20250714225937.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:38 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cmi40jh5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2m312ue6.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=69678', 'data', 'file=/tmp/tmpfus6j9tk/cmi40jh5.json', 'init=/tmp/tmpfus6j9tk/2m312ue6.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model5pyqgb6v/prophet_model-20250714225938.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:38 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/f8w9umf9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0z99rbmb.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=79320', 'data', 'file=/tmp/tmpfus6j9tk/f8w9umf9.json', 'init=/tmp/tmpfus6j9tk/0z99rbmb.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelotu8s_uz/prophet_model-20250714225939.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:39 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:40 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rhz_g9z5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5ya3r8is.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=39490', 'data', 'file=/tmp/tmpfus6j9tk/rhz_g9z5.json', 'init=/tmp/tmpfus6j9tk/5ya3r8is.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model2hvpzuvi/prophet_model-20250714225940.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:40 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:41 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/om7g8w87.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2c5br7t6.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38231', 'data', 'file=/tmp/tmpfus6j9tk/om7g8w87.json', 'init=/tmp/tmpfus6j9tk/2c5br7t6.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelfccscj9y/prophet_model-20250714225941.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:41 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:42 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/i8pn71k3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/01lzeehr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=64712', 'data', 'file=/tmp/tmpfus6j9tk/i8pn71k3.json', 'init=/tmp/tmpfus6j9tk/01lzeehr.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_models9j3pjrn/prophet_model-20250714225942.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:42 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:43 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7uxx9mau.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r9uzo3r1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38660', 'data', 'file=/tmp/tmpfus6j9tk/7uxx9mau.json', 'init=/tmp/tmpfus6j9tk/r9uzo3r1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeliilrt_pv/prophet_model-20250714225944.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:44 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zf70q7is.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/60l8lpq1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=10285', 'data', 'file=/tmp/tmpfus6j9tk/zf70q7is.json', 'init=/tmp/tmpfus6j9tk/60l8lpq1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeln2fro65z/prophet_model-20250714225944.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h86a_iq4.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/84k0hw0m.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=50476', 'data', 'file=/tmp/tmpfus6j9tk/h86a_iq4.json', 'init=/tmp/tmpfus6j9tk/84k0hw0m.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model1auu0nlr/prophet_model-20250714225945.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:45 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:46 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3_fqi1e2.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qk6yvpr7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73456', 'data', 'file=/tmp/tmpfus6j9tk/3_fqi1e2.json', 'init=/tmp/tmpfus6j9tk/qk6yvpr7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmgjs4em4/prophet_model-20250714225947.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:47 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:48 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/204mf2y1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/srkoz9gd.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=35808', 'data', 'file=/tmp/tmpfus6j9tk/204mf2y1.json', 'init=/tmp/tmpfus6j9tk/srkoz9gd.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelzlbhrszd/prophet_model-20250714225949.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/raoyifpd.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pjzhx_3j.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=41172', 'data', 'file=/tmp/tmpfus6j9tk/raoyifpd.json', 'init=/tmp/tmpfus6j9tk/pjzhx_3j.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpl_8_9uk/prophet_model-20250714225949.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ajotdn62.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2lavqni2.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=66341', 'data', 'file=/tmp/tmpfus6j9tk/ajotdn62.json', 'init=/tmp/tmpfus6j9tk/2lavqni2.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0hc2ss0o/prophet_model-20250714225950.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:50 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:51 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pdrg9_p9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/70vq4pps.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=53354', 'data', 'file=/tmp/tmpfus6j9tk/pdrg9_p9.json', 'init=/tmp/tmpfus6j9tk/70vq4pps.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelj8lzgphi/prophet_model-20250714225951.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:51 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:52 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_pxoibk7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/axoqh_bn.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6221', 'data', 'file=/tmp/tmpfus6j9tk/_pxoibk7.json', 'init=/tmp/tmpfus6j9tk/axoqh_bn.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeliq8lcia8/prophet_model-20250714225953.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:53 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:53 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/stw_rwfo.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/py8l5s7t.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=52682', 'data', 'file=/tmp/tmpfus6j9tk/stw_rwfo.json', 'init=/tmp/tmpfus6j9tk/py8l5s7t.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modely7t56z4a/prophet_model-20250714225953.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:53 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:54 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/clunx9l7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y8lgu0im.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85488', 'data', 'file=/tmp/tmpfus6j9tk/clunx9l7.json', 'init=/tmp/tmpfus6j9tk/y8lgu0im.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelv8mss_9z/prophet_model-20250714225954.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:54 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:54 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bek3hwrl.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2vdwvwua.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=98631', 'data', 'file=/tmp/tmpfus6j9tk/bek3hwrl.json', 'init=/tmp/tmpfus6j9tk/2vdwvwua.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeli561ong9/prophet_model-20250714225955.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:55 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yk9h1bi9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9mnxvjxy.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=37446', 'data', 'file=/tmp/tmpfus6j9tk/yk9h1bi9.json', 'init=/tmp/tmpfus6j9tk/9mnxvjxy.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelldoard43/prophet_model-20250714225957.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:57 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:57 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/giyjffzx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fmco9t17.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=11236', 'data', 'file=/tmp/tmpfus6j9tk/giyjffzx.json', 'init=/tmp/tmpfus6j9tk/fmco9t17.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modellho3q6x4/prophet_model-20250714225957.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:57 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:57 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gz7pi5b_.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9f574h3c.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=19766', 'data', 'file=/tmp/tmpfus6j9tk/gz7pi5b_.json', 'init=/tmp/tmpfus6j9tk/9f574h3c.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model1sj4dzsn/prophet_model-20250714225958.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:58 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 22:59:58 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yd16hhjc.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/50b6e1_5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=58382', 'data', 'file=/tmp/tmpfus6j9tk/yd16hhjc.json', 'init=/tmp/tmpfus6j9tk/50b6e1_5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelscq71nkg/prophet_model-20250714225959.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 22:59:59 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:00 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/nl551vno.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d8qqb5cn.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=56760', 'data', 'file=/tmp/tmpfus6j9tk/nl551vno.json', 'init=/tmp/tmpfus6j9tk/d8qqb5cn.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpp0etf2k/prophet_model-20250714230001.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:01 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:02 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5_y45ev7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/eorc7e1r.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=56311', 'data', 'file=/tmp/tmpfus6j9tk/5_y45ev7.json', 'init=/tmp/tmpfus6j9tk/eorc7e1r.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeln929v8l9/prophet_model-20250714230002.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:02 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kssa9m3z.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qt5uiv0j.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=97915', 'data', 'file=/tmp/tmpfus6j9tk/kssa9m3z.json', 'init=/tmp/tmpfus6j9tk/qt5uiv0j.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modele8i2e_ez/prophet_model-20250714230005.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:05 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:06 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3ug7gr1s.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/xy_e51ee.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=37869', 'data', 'file=/tmp/tmpfus6j9tk/3ug7gr1s.json', 'init=/tmp/tmpfus6j9tk/xy_e51ee.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model2j87yrt9/prophet_model-20250714230007.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j0tdqn83.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ia4e0bgs.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31834', 'data', 'file=/tmp/tmpfus6j9tk/j0tdqn83.json', 'init=/tmp/tmpfus6j9tk/ia4e0bgs.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelp4ir049a/prophet_model-20250714230008.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:08 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b2ivi5uu.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kjzv4eum.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=13897', 'data', 'file=/tmp/tmpfus6j9tk/b2ivi5uu.json', 'init=/tmp/tmpfus6j9tk/kjzv4eum.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxst2cvm4/prophet_model-20250714230010.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7j1gkm9r.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5450mvko.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=81742', 'data', 'file=/tmp/tmpfus6j9tk/7j1gkm9r.json', 'init=/tmp/tmpfus6j9tk/5450mvko.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelqvh265n6/prophet_model-20250714230011.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:00:11 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:00:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
🎯 Step 20: Evaluation Summary with Best Model
Country Target Model RMSE MAPE R² Best_Model
Bangladesh Cardiovascular diseases ARIMA 1.1756 4.02 -1.094927e+29 Random Forest
Bangladesh Cardiovascular diseases Prophet 6.9912 24.69 -3.872468e+30 Random Forest
Bangladesh Cardiovascular diseases Random Forest 0.4101 1.45 -1.332712e+28 Random Forest
Bangladesh Diabetes ARIMA 0.0000 0.00 0.000000e+00 ARIMA
Bangladesh Diabetes Prophet 2.9878 30.49 0.000000e+00 ARIMA
Bangladesh Diabetes Random Forest 0.1033 1.05 0.000000e+00 ARIMA
Bangladesh Life expectancy ARIMA 2.3127 2.76 -1.102500e+00 Prophet
Bangladesh Life expectancy Prophet 1.6767 1.89 -1.051000e-01 Prophet
Bangladesh Life expectancy Random Forest 3.5922 4.01 -4.072400e+00 Prophet
Brazil Cardiovascular diseases ARIMA 1.8195 4.66 0.000000e+00 ARIMA
Brazil Cardiovascular diseases Prophet 6.5472 16.73 0.000000e+00 ARIMA
Brazil Cardiovascular diseases Random Forest 2.3205 5.95 0.000000e+00 ARIMA
Brazil Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Brazil Diabetes Prophet 0.1860 2.14 0.000000e+00 ARIMA
Brazil Diabetes Random Forest 0.0327 0.35 0.000000e+00 ARIMA
Brazil Life expectancy ARIMA 3.0096 3.29 -5.672800e+00 Random Forest
Brazil Life expectancy Prophet 2.1896 2.66 -2.531900e+00 Random Forest
Brazil Life expectancy Random Forest 1.1742 1.25 -1.570000e-02 Random Forest
Germany Cardiovascular diseases ARIMA 0.4339 1.23 0.000000e+00 ARIMA
Germany Cardiovascular diseases Prophet 2.1255 5.82 0.000000e+00 ARIMA
Germany Cardiovascular diseases Random Forest 0.8296 2.34 0.000000e+00 ARIMA
Germany Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Germany Diabetes Prophet 2.7582 55.13 0.000000e+00 ARIMA
Germany Diabetes Random Forest 0.0000 0.00 1.000000e+00 ARIMA
Germany Life expectancy ARIMA 0.4746 0.44 -1.051900e+00 Random Forest
Germany Life expectancy Prophet 0.6124 0.65 -2.417000e+00 Random Forest
Germany Life expectancy Random Forest 0.3493 0.41 -1.118000e-01 Random Forest
India Cardiovascular diseases ARIMA 19.6630 6.68 0.000000e+00 Random Forest
India Cardiovascular diseases Prophet 37.4210 12.75 0.000000e+00 Random Forest
India Cardiovascular diseases Random Forest 14.6042 4.52 0.000000e+00 Random Forest
India Diabetes ARIMA 0.0197 0.21 0.000000e+00 ARIMA
India Diabetes Prophet 0.8306 9.49 0.000000e+00 ARIMA
India Diabetes Random Forest 0.0350 0.40 0.000000e+00 ARIMA
India Life expectancy ARIMA 1.9737 2.25 1.628000e-01 ARIMA
India Life expectancy Prophet 2.4758 2.42 -3.173000e-01 ARIMA
India Life expectancy Random Forest 2.7723 3.85 -6.518000e-01 ARIMA
Indonesia Cardiovascular diseases ARIMA 8.4866 11.75 0.000000e+00 Random Forest
Indonesia Cardiovascular diseases Prophet 7.9981 9.90 0.000000e+00 Random Forest
Indonesia Cardiovascular diseases Random Forest 7.3668 10.19 0.000000e+00 Random Forest
Indonesia Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Indonesia Diabetes Prophet 0.7121 9.24 0.000000e+00 ARIMA
Indonesia Diabetes Random Forest 0.0090 0.12 0.000000e+00 ARIMA
Indonesia Life expectancy ARIMA 1.8872 2.68 -2.444000e-01 Prophet
Indonesia Life expectancy Prophet 1.6929 1.48 -1.400000e-03 Prophet
Indonesia Life expectancy Random Forest 1.7905 2.56 -1.202000e-01 Prophet
Japan Cardiovascular diseases ARIMA 1.5477 3.73 0.000000e+00 ARIMA
Japan Cardiovascular diseases Prophet 7.6884 18.56 0.000000e+00 ARIMA
Japan Cardiovascular diseases Random Forest 2.5709 6.18 0.000000e+00 ARIMA
Japan Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Japan Diabetes Prophet 1.8411 27.47 0.000000e+00 ARIMA
Japan Diabetes Random Forest 0.0000 0.00 0.000000e+00 ARIMA
Japan Life expectancy ARIMA 0.6387 0.68 -4.204900e+00 Random Forest
Japan Life expectancy Prophet 0.5765 0.59 -3.239500e+00 Random Forest
Japan Life expectancy Random Forest 0.2755 0.30 3.170000e-02 Random Forest
Kenya Cardiovascular diseases ARIMA 0.1218 3.48 -7.516462e+28 ARIMA
Kenya Cardiovascular diseases Prophet 0.9335 26.66 -4.418335e+30 ARIMA
Kenya Cardiovascular diseases Random Forest 0.3225 9.22 -5.274808e+29 ARIMA
Kenya Diabetes ARIMA 0.0004 0.01 0.000000e+00 ARIMA
Kenya Diabetes Prophet 3.4797 57.98 0.000000e+00 ARIMA
Kenya Diabetes Random Forest 0.0070 0.12 0.000000e+00 ARIMA
Kenya Life expectancy ARIMA 3.2353 4.35 -7.360000e+00 Random Forest
Kenya Life expectancy Prophet 1.6706 2.25 -1.228900e+00 Random Forest
Kenya Life expectancy Random Forest 0.8421 1.34 4.337000e-01 Random Forest
Mexico Cardiovascular diseases ARIMA 0.5788 2.17 -2.654270e+28 ARIMA
Mexico Cardiovascular diseases Prophet 0.8437 3.08 -5.639601e+28 ARIMA
Mexico Cardiovascular diseases Random Forest 2.4137 10.91 -4.615694e+29 ARIMA
Mexico Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Mexico Diabetes Prophet 0.7997 7.13 -2.026747e+29 ARIMA
Mexico Diabetes Random Forest 0.0272 0.15 -2.341984e+26 ARIMA
Mexico Life expectancy ARIMA 6.2245 7.05 -6.367500e+00 Prophet
Mexico Life expectancy Prophet 2.4286 2.54 -1.216000e-01 Prophet
Mexico Life expectancy Random Forest 2.5185 3.37 -2.061000e-01 Prophet
Nigeria Cardiovascular diseases ARIMA 0.7164 3.98 0.000000e+00 ARIMA
Nigeria Cardiovascular diseases Prophet 4.4984 24.97 0.000000e+00 ARIMA
Nigeria Cardiovascular diseases Random Forest 1.7240 9.58 0.000000e+00 ARIMA
Nigeria Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Nigeria Diabetes Prophet 0.1408 2.06 0.000000e+00 ARIMA
Nigeria Diabetes Random Forest 0.0040 0.06 0.000000e+00 ARIMA
Nigeria Life expectancy ARIMA 0.7003 1.17 -1.846100e+00 Prophet
Nigeria Life expectancy Prophet 0.3693 0.58 2.086000e-01 Prophet
Nigeria Life expectancy Random Forest 6.4246 11.78 -2.385209e+02 Prophet
United States Cardiovascular diseases ARIMA 1.1904 1.29 0.000000e+00 ARIMA
United States Cardiovascular diseases Prophet 11.9749 12.93 0.000000e+00 ARIMA
United States Cardiovascular diseases Random Forest 4.3086 4.60 0.000000e+00 ARIMA
United States Diabetes ARIMA 0.0080 0.10 0.000000e+00 ARIMA
United States Diabetes Prophet 0.4896 6.65 0.000000e+00 ARIMA
United States Diabetes Random Forest 0.0081 0.06 0.000000e+00 ARIMA
United States Life expectancy ARIMA 1.9969 2.10 -1.796800e+00 Random Forest
United States Life expectancy Prophet 1.5614 1.63 -7.100000e-01 Random Forest
United States Life expectancy Random Forest 1.1020 1.20 1.482000e-01 Random Forest
# Evaluation metrics (RMSE, MAPE, R²)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def calculate_metrics(actual, predicted):
rmse = np.sqrt(mean_squared_error(actual, predicted))
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
return round(rmse, 4), round(mape, 2), round(r2, 4)
metrics_summary = []
# Evaluation years
eval_years = [2021, 2022, 2023]
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country]
for target in target_columns:
if target not in df_country.columns:
continue
actual = df_country[df_country['Year'].isin(eval_years)][target].values
# --- ARIMA ---
try:
train_series = df_country[df_country['Year'].between(1950, 2020)][[target]]
train_series.index = pd.date_range(start='1950', periods=len(train_series), freq='YE')
model_arima = ARIMA(train_series, order=(1, 1, 1)).fit()
arima_pred = model_arima.predict(start=len(train_series), end=len(train_series)+len(eval_years)-1)
arima_rmse, arima_mape, arima_r2 = calculate_metrics(actual, arima_pred)
metrics_summary.append({
"Country": country, "Target": target, "Model": "ARIMA",
"RMSE": arima_rmse, "MAPE": arima_mape, "R²": arima_r2
})
except:
pass
# --- Prophet ---
try:
prophet_df = df_country[df_country['Year'].between(1950, 2020)][['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model_prophet = Prophet()
model_prophet.fit(prophet_df)
future_eval = pd.DataFrame({'ds': pd.to_datetime(eval_years, format='%Y')})
prophet_pred = model_prophet.predict(future_eval)['yhat'].values
prophet_rmse, prophet_mape, prophet_r2 = calculate_metrics(actual, prophet_pred)
metrics_summary.append({
"Country": country, "Target": target, "Model": "Prophet",
"RMSE": prophet_rmse, "MAPE": prophet_mape, "R²": prophet_r2
})
except:
pass
# --- Random Forest ---
try:
features = selected_features_dict.get(target, [])
available = [f for f in features if f in df_country.columns]
X = df_country[available]
y = df_country[target]
X_train = X[df_country['Year'].between(1950, 2020)]
y_train = y[df_country['Year'].between(1950, 2020)]
X_eval = X[df_country['Year'].isin(eval_years)]
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
rf_pred = model_rf.predict(X_eval)
rf_rmse, rf_mape, rf_r2 = calculate_metrics(actual, rf_pred)
metrics_summary.append({
"Country": country, "Target": target, "Model": "Random Forest",
"RMSE": rf_rmse, "MAPE": rf_mape, "R²": rf_r2
})
except:
pass
def pick_best_model(group):
return group.loc[group['RMSE'].idxmin(), 'Model']
# Convert to DataFrame
df_metrics = pd.DataFrame(metrics_summary)
# Sort it and assign it to df_metrics_sorted
df_metrics_sorted = df_metrics.sort_values(['Country', 'Target', 'Model']).reset_index(drop=True)
# Best model picker function
def pick_best_model(group):
return group.loc[group['RMSE'].idxmin(), 'Model']
# Assign Best_Model using groupby and transform
df_metrics_sorted['Best_Model'] = df_metrics_sorted.groupby(['Country', 'Target'])['RMSE'].transform(
lambda x: df_metrics_sorted.loc[x.idxmin(), 'Model']
)
# Display full table
print("\n🎯 Step 20: Evaluation Summary with Best Model\n")
print(df_metrics_sorted[['Country', 'Target', 'Model', 'RMSE', 'MAPE', 'R²', 'Best_Model']].to_string(index=False))
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kig0c_vt.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_xe1a8zq.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=45485', 'data', 'file=/tmp/tmpfus6j9tk/kig0c_vt.json', 'init=/tmp/tmpfus6j9tk/_xe1a8zq.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelibybrgdm/prophet_model-20250714230244.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ij2novhx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/02pf3_6l.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=10912', 'data', 'file=/tmp/tmpfus6j9tk/ij2novhx.json', 'init=/tmp/tmpfus6j9tk/02pf3_6l.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelehueu4_7/prophet_model-20250714230245.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:45 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v7hm3i5u.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qj753fuo.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=74985', 'data', 'file=/tmp/tmpfus6j9tk/v7hm3i5u.json', 'init=/tmp/tmpfus6j9tk/qj753fuo.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model4vxx4ws0/prophet_model-20250714230246.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:46 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5vkkikae.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3g85gazt.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=70426', 'data', 'file=/tmp/tmpfus6j9tk/5vkkikae.json', 'init=/tmp/tmpfus6j9tk/3g85gazt.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelt9_sy5iv/prophet_model-20250714230247.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:47 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:48 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zlxjijti.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/c5ablhl5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=81481', 'data', 'file=/tmp/tmpfus6j9tk/zlxjijti.json', 'init=/tmp/tmpfus6j9tk/c5ablhl5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelyzln2dm5/prophet_model-20250714230248.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:48 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ktmrrp5q.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/s23o9azw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=79907', 'data', 'file=/tmp/tmpfus6j9tk/ktmrrp5q.json', 'init=/tmp/tmpfus6j9tk/s23o9azw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwc7tspdl/prophet_model-20250714230249.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q7vzj4lj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ygrp4vjz.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=22179', 'data', 'file=/tmp/tmpfus6j9tk/q7vzj4lj.json', 'init=/tmp/tmpfus6j9tk/ygrp4vjz.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmdali8uu/prophet_model-20250714230250.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:50 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ddof7qn2.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/xvju00zk.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=13501', 'data', 'file=/tmp/tmpfus6j9tk/ddof7qn2.json', 'init=/tmp/tmpfus6j9tk/xvju00zk.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeltg0kt4hf/prophet_model-20250714230252.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:52 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:53 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/u5b8y7k1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cohkvypk.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=8938', 'data', 'file=/tmp/tmpfus6j9tk/u5b8y7k1.json', 'init=/tmp/tmpfus6j9tk/cohkvypk.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmzsu88uo/prophet_model-20250714230254.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:54 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:55 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2nk9_wu8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5yszub2e.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28297', 'data', 'file=/tmp/tmpfus6j9tk/2nk9_wu8.json', 'init=/tmp/tmpfus6j9tk/5yszub2e.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhuwbjjy4/prophet_model-20250714230258.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:02:58 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:02:59 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/uekq9it_.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z9fbb3w9.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=65184', 'data', 'file=/tmp/tmpfus6j9tk/uekq9it_.json', 'init=/tmp/tmpfus6j9tk/z9fbb3w9.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelss9hbsnn/prophet_model-20250714230301.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:01 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:02 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ngigbi76.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8b9c1kpi.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=49113', 'data', 'file=/tmp/tmpfus6j9tk/ngigbi76.json', 'init=/tmp/tmpfus6j9tk/8b9c1kpi.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwou_hscn/prophet_model-20250714230303.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:03 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/av4gacdt.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/mlwzpk8a.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=40756', 'data', 'file=/tmp/tmpfus6j9tk/av4gacdt.json', 'init=/tmp/tmpfus6j9tk/mlwzpk8a.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelavsi_18y/prophet_model-20250714230309.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:09 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tb8rbpnv.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8rh0lwdw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=43628', 'data', 'file=/tmp/tmpfus6j9tk/tb8rbpnv.json', 'init=/tmp/tmpfus6j9tk/8rh0lwdw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelfuzstolr/prophet_model-20250714230311.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:11 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:12 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/11dswh8g.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/jc3xq5qm.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=19399', 'data', 'file=/tmp/tmpfus6j9tk/11dswh8g.json', 'init=/tmp/tmpfus6j9tk/jc3xq5qm.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwdpe38m9/prophet_model-20250714230312.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:12 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9c1fgh6v.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dptr2lw2.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=90487', 'data', 'file=/tmp/tmpfus6j9tk/9c1fgh6v.json', 'init=/tmp/tmpfus6j9tk/dptr2lw2.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model8o2t79q_/prophet_model-20250714230314.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:14 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:15 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/joeui61v.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h4_p06d7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=25984', 'data', 'file=/tmp/tmpfus6j9tk/joeui61v.json', 'init=/tmp/tmpfus6j9tk/h4_p06d7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modell2j1y40g/prophet_model-20250714230316.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:16 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:17 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ep5upw4c.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2h33_c22.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=26292', 'data', 'file=/tmp/tmpfus6j9tk/ep5upw4c.json', 'init=/tmp/tmpfus6j9tk/2h33_c22.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model5bke4h2i/prophet_model-20250714230319.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:19 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:19 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gwi0c_j_.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/l8h797bn.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23719', 'data', 'file=/tmp/tmpfus6j9tk/gwi0c_j_.json', 'init=/tmp/tmpfus6j9tk/l8h797bn.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model7q5bnopz/prophet_model-20250714230321.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:21 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:22 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ywvw0dxh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gz3j794w.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=1926', 'data', 'file=/tmp/tmpfus6j9tk/ywvw0dxh.json', 'init=/tmp/tmpfus6j9tk/gz3j794w.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelcjj94cup/prophet_model-20250714230323.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:23 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:23 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/xhevpvm1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_hl1czod.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=78835', 'data', 'file=/tmp/tmpfus6j9tk/xhevpvm1.json', 'init=/tmp/tmpfus6j9tk/_hl1czod.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model42sd771r/prophet_model-20250714230324.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:24 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:25 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3q2qjr0k.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cvwomqdr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=90586', 'data', 'file=/tmp/tmpfus6j9tk/3q2qjr0k.json', 'init=/tmp/tmpfus6j9tk/cvwomqdr.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeldaunti3x/prophet_model-20250714230325.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:25 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:27 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y1ovf8d6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/jpnf47xo.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33687', 'data', 'file=/tmp/tmpfus6j9tk/y1ovf8d6.json', 'init=/tmp/tmpfus6j9tk/jpnf47xo.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelg_mgq7bl/prophet_model-20250714230328.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:28 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:28 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/49vdbyx6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/34cev01s.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=92160', 'data', 'file=/tmp/tmpfus6j9tk/49vdbyx6.json', 'init=/tmp/tmpfus6j9tk/34cev01s.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0g6kgkqh/prophet_model-20250714230329.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:29 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:30 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z097gi8t.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/twgtjaow.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=31778', 'data', 'file=/tmp/tmpfus6j9tk/z097gi8t.json', 'init=/tmp/tmpfus6j9tk/twgtjaow.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelnbs5dk08/prophet_model-20250714230332.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:32 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/xf059fyj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0osfj3hd.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33776', 'data', 'file=/tmp/tmpfus6j9tk/xf059fyj.json', 'init=/tmp/tmpfus6j9tk/0osfj3hd.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_342g0v1/prophet_model-20250714230335.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:35 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:36 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tqrwwjt4.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bp7lwhgm.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=16826', 'data', 'file=/tmp/tmpfus6j9tk/tqrwwjt4.json', 'init=/tmp/tmpfus6j9tk/bp7lwhgm.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelewkosqa4/prophet_model-20250714230337.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r7xkpfml.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3a2y_rn8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=22765', 'data', 'file=/tmp/tmpfus6j9tk/r7xkpfml.json', 'init=/tmp/tmpfus6j9tk/3a2y_rn8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeldhyn9cmp/prophet_model-20250714230337.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fflanit7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/uu_j66zx.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=46009', 'data', 'file=/tmp/tmpfus6j9tk/fflanit7.json', 'init=/tmp/tmpfus6j9tk/uu_j66zx.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelo1e9q4_5/prophet_model-20250714230338.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:38 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/n1wmddzh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/4yd39zbc.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=70996', 'data', 'file=/tmp/tmpfus6j9tk/n1wmddzh.json', 'init=/tmp/tmpfus6j9tk/4yd39zbc.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeln67_d4em/prophet_model-20250714230338.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:03:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:03:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
🎯 Step 20: Evaluation Summary with Best Model
Country Target Model RMSE MAPE R² Best_Model
Bangladesh Cardiovascular diseases ARIMA 1.1756 4.02 -1.094927e+29 Random Forest
Bangladesh Cardiovascular diseases Prophet 6.9912 24.69 -3.872468e+30 Random Forest
Bangladesh Cardiovascular diseases Random Forest 0.4101 1.45 -1.332712e+28 Random Forest
Bangladesh Diabetes ARIMA 0.0000 0.00 0.000000e+00 ARIMA
Bangladesh Diabetes Prophet 2.9878 30.49 0.000000e+00 ARIMA
Bangladesh Diabetes Random Forest 0.1033 1.05 0.000000e+00 ARIMA
Bangladesh Life expectancy ARIMA 2.3127 2.76 -1.102500e+00 Prophet
Bangladesh Life expectancy Prophet 1.6767 1.89 -1.051000e-01 Prophet
Bangladesh Life expectancy Random Forest 3.5922 4.01 -4.072400e+00 Prophet
Brazil Cardiovascular diseases ARIMA 1.8195 4.66 0.000000e+00 ARIMA
Brazil Cardiovascular diseases Prophet 6.5472 16.73 0.000000e+00 ARIMA
Brazil Cardiovascular diseases Random Forest 2.3205 5.95 0.000000e+00 ARIMA
Brazil Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Brazil Diabetes Prophet 0.1860 2.14 0.000000e+00 ARIMA
Brazil Diabetes Random Forest 0.0327 0.35 0.000000e+00 ARIMA
Brazil Life expectancy ARIMA 3.0096 3.29 -5.672800e+00 Random Forest
Brazil Life expectancy Prophet 2.1896 2.66 -2.531900e+00 Random Forest
Brazil Life expectancy Random Forest 1.1742 1.25 -1.570000e-02 Random Forest
Germany Cardiovascular diseases ARIMA 0.4339 1.23 0.000000e+00 ARIMA
Germany Cardiovascular diseases Prophet 2.1255 5.82 0.000000e+00 ARIMA
Germany Cardiovascular diseases Random Forest 0.8296 2.34 0.000000e+00 ARIMA
Germany Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Germany Diabetes Prophet 2.7582 55.13 0.000000e+00 ARIMA
Germany Diabetes Random Forest 0.0000 0.00 1.000000e+00 ARIMA
Germany Life expectancy ARIMA 0.4746 0.44 -1.051900e+00 Random Forest
Germany Life expectancy Prophet 0.6124 0.65 -2.417000e+00 Random Forest
Germany Life expectancy Random Forest 0.3493 0.41 -1.118000e-01 Random Forest
India Cardiovascular diseases ARIMA 19.6630 6.68 0.000000e+00 Random Forest
India Cardiovascular diseases Prophet 37.4210 12.75 0.000000e+00 Random Forest
India Cardiovascular diseases Random Forest 14.6042 4.52 0.000000e+00 Random Forest
India Diabetes ARIMA 0.0197 0.21 0.000000e+00 ARIMA
India Diabetes Prophet 0.8306 9.49 0.000000e+00 ARIMA
India Diabetes Random Forest 0.0350 0.40 0.000000e+00 ARIMA
India Life expectancy ARIMA 1.9737 2.25 1.628000e-01 ARIMA
India Life expectancy Prophet 2.4758 2.42 -3.173000e-01 ARIMA
India Life expectancy Random Forest 2.7723 3.85 -6.518000e-01 ARIMA
Indonesia Cardiovascular diseases ARIMA 8.4866 11.75 0.000000e+00 Random Forest
Indonesia Cardiovascular diseases Prophet 7.9981 9.90 0.000000e+00 Random Forest
Indonesia Cardiovascular diseases Random Forest 7.3668 10.19 0.000000e+00 Random Forest
Indonesia Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Indonesia Diabetes Prophet 0.7121 9.24 0.000000e+00 ARIMA
Indonesia Diabetes Random Forest 0.0090 0.12 0.000000e+00 ARIMA
Indonesia Life expectancy ARIMA 1.8872 2.68 -2.444000e-01 Prophet
Indonesia Life expectancy Prophet 1.6929 1.48 -1.400000e-03 Prophet
Indonesia Life expectancy Random Forest 1.7905 2.56 -1.202000e-01 Prophet
Japan Cardiovascular diseases ARIMA 1.5477 3.73 0.000000e+00 ARIMA
Japan Cardiovascular diseases Prophet 7.6884 18.56 0.000000e+00 ARIMA
Japan Cardiovascular diseases Random Forest 2.5709 6.18 0.000000e+00 ARIMA
Japan Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Japan Diabetes Prophet 1.8411 27.47 0.000000e+00 ARIMA
Japan Diabetes Random Forest 0.0000 0.00 0.000000e+00 ARIMA
Japan Life expectancy ARIMA 0.6387 0.68 -4.204900e+00 Random Forest
Japan Life expectancy Prophet 0.5765 0.59 -3.239500e+00 Random Forest
Japan Life expectancy Random Forest 0.2755 0.30 3.170000e-02 Random Forest
Kenya Cardiovascular diseases ARIMA 0.1218 3.48 -7.516462e+28 ARIMA
Kenya Cardiovascular diseases Prophet 0.9335 26.66 -4.418335e+30 ARIMA
Kenya Cardiovascular diseases Random Forest 0.3225 9.22 -5.274808e+29 ARIMA
Kenya Diabetes ARIMA 0.0004 0.01 0.000000e+00 ARIMA
Kenya Diabetes Prophet 3.4797 57.98 0.000000e+00 ARIMA
Kenya Diabetes Random Forest 0.0070 0.12 0.000000e+00 ARIMA
Kenya Life expectancy ARIMA 3.2353 4.35 -7.360000e+00 Random Forest
Kenya Life expectancy Prophet 1.6706 2.25 -1.228900e+00 Random Forest
Kenya Life expectancy Random Forest 0.8421 1.34 4.337000e-01 Random Forest
Mexico Cardiovascular diseases ARIMA 0.5788 2.17 -2.654270e+28 ARIMA
Mexico Cardiovascular diseases Prophet 0.8437 3.08 -5.639601e+28 ARIMA
Mexico Cardiovascular diseases Random Forest 2.4137 10.91 -4.615694e+29 ARIMA
Mexico Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Mexico Diabetes Prophet 0.7997 7.13 -2.026747e+29 ARIMA
Mexico Diabetes Random Forest 0.0272 0.15 -2.341984e+26 ARIMA
Mexico Life expectancy ARIMA 6.2245 7.05 -6.367500e+00 Prophet
Mexico Life expectancy Prophet 2.4286 2.54 -1.216000e-01 Prophet
Mexico Life expectancy Random Forest 2.5185 3.37 -2.061000e-01 Prophet
Nigeria Cardiovascular diseases ARIMA 0.7164 3.98 0.000000e+00 ARIMA
Nigeria Cardiovascular diseases Prophet 4.4984 24.97 0.000000e+00 ARIMA
Nigeria Cardiovascular diseases Random Forest 1.7240 9.58 0.000000e+00 ARIMA
Nigeria Diabetes ARIMA 0.0000 0.00 1.000000e+00 ARIMA
Nigeria Diabetes Prophet 0.1408 2.06 0.000000e+00 ARIMA
Nigeria Diabetes Random Forest 0.0040 0.06 0.000000e+00 ARIMA
Nigeria Life expectancy ARIMA 0.7003 1.17 -1.846100e+00 Prophet
Nigeria Life expectancy Prophet 0.3693 0.58 2.086000e-01 Prophet
Nigeria Life expectancy Random Forest 6.4246 11.78 -2.385209e+02 Prophet
United States Cardiovascular diseases ARIMA 1.1904 1.29 0.000000e+00 ARIMA
United States Cardiovascular diseases Prophet 11.9749 12.93 0.000000e+00 ARIMA
United States Cardiovascular diseases Random Forest 4.3086 4.60 0.000000e+00 ARIMA
United States Diabetes ARIMA 0.0080 0.10 0.000000e+00 ARIMA
United States Diabetes Prophet 0.4896 6.65 0.000000e+00 ARIMA
United States Diabetes Random Forest 0.0081 0.06 0.000000e+00 ARIMA
United States Life expectancy ARIMA 1.9969 2.10 -1.796800e+00 Random Forest
United States Life expectancy Prophet 1.5614 1.63 -7.100000e-01 Random Forest
United States Life expectancy Random Forest 1.1020 1.20 1.482000e-01 Random Forest
Based on the result from Metric Comparison Table. Random Forest is the best method overall. Therefore, it will be used for forecasting in this project.¶
# Forecast between 2024 - 2074 using Random Forest
# Create final forecast table using only Random Forest forecasts
df_final_forecast_rf = df_model_comparison[['Country', 'Target', 'Year', 'RF_Forecast']].copy()
df_final_forecast_rf = df_final_forecast_rf.rename(columns={'RF_Forecast': 'Forecast'})
# Add model name for clarity
df_final_forecast_rf['Model'] = 'Random Forest'
# Preview forecast for selected years
selected_years = [2025, 2030, 2040, 2050, 2060, 2074]
df_preview_rf = df_final_forecast_rf[df_final_forecast_rf['Year'].isin(selected_years)]
df_preview_rf = df_preview_rf.sort_values(['Country', 'Target', 'Year']).reset_index(drop=True)
print("\nFinal Random Forest Forecast (Selected Years):")
print(df_preview_rf.to_string(index=False))
Final Random Forest Forecast (Selected Years):
Country Target Year Forecast Model
Bangladesh Cardiovascular diseases 2025 27.854558 Random Forest
Bangladesh Cardiovascular diseases 2030 27.854558 Random Forest
Bangladesh Cardiovascular diseases 2040 27.854558 Random Forest
Bangladesh Cardiovascular diseases 2050 27.854558 Random Forest
Bangladesh Cardiovascular diseases 2060 27.854558 Random Forest
Bangladesh Diabetes 2025 9.689000 Random Forest
Bangladesh Diabetes 2030 9.689000 Random Forest
Bangladesh Diabetes 2040 9.689000 Random Forest
Bangladesh Diabetes 2050 9.689000 Random Forest
Bangladesh Diabetes 2060 9.689000 Random Forest
Bangladesh Life expectancy 2025 69.439186 Random Forest
Bangladesh Life expectancy 2030 69.439186 Random Forest
Bangladesh Life expectancy 2040 69.439186 Random Forest
Bangladesh Life expectancy 2050 69.439186 Random Forest
Bangladesh Life expectancy 2060 69.439186 Random Forest
Brazil Cardiovascular diseases 2025 36.784485 Random Forest
Brazil Cardiovascular diseases 2030 36.784485 Random Forest
Brazil Cardiovascular diseases 2040 36.784485 Random Forest
Brazil Cardiovascular diseases 2050 36.784485 Random Forest
Brazil Cardiovascular diseases 2060 36.784485 Random Forest
Brazil Diabetes 2025 8.290000 Random Forest
Brazil Diabetes 2030 8.290000 Random Forest
Brazil Diabetes 2040 8.290000 Random Forest
Brazil Diabetes 2050 8.290000 Random Forest
Brazil Diabetes 2060 8.290000 Random Forest
Brazil Life expectancy 2025 75.387073 Random Forest
Brazil Life expectancy 2030 75.387073 Random Forest
Brazil Life expectancy 2040 75.387073 Random Forest
Brazil Life expectancy 2050 75.387073 Random Forest
Brazil Life expectancy 2060 75.387073 Random Forest
Germany Cardiovascular diseases 2025 34.418009 Random Forest
Germany Cardiovascular diseases 2030 34.418009 Random Forest
Germany Cardiovascular diseases 2040 34.418009 Random Forest
Germany Cardiovascular diseases 2050 34.418009 Random Forest
Germany Cardiovascular diseases 2060 34.418009 Random Forest
Germany Diabetes 2025 5.000000 Random Forest
Germany Diabetes 2030 5.000000 Random Forest
Germany Diabetes 2040 5.000000 Random Forest
Germany Diabetes 2050 5.000000 Random Forest
Germany Diabetes 2060 5.000000 Random Forest
Germany Life expectancy 2025 80.959762 Random Forest
Germany Life expectancy 2030 80.959762 Random Forest
Germany Life expectancy 2040 80.959762 Random Forest
Germany Life expectancy 2050 80.959762 Random Forest
Germany Life expectancy 2060 80.959762 Random Forest
India Cardiovascular diseases 2025 268.498109 Random Forest
India Cardiovascular diseases 2030 268.498109 Random Forest
India Cardiovascular diseases 2040 268.498109 Random Forest
India Cardiovascular diseases 2050 268.498109 Random Forest
India Cardiovascular diseases 2060 268.498109 Random Forest
India Diabetes 2025 8.665000 Random Forest
India Diabetes 2030 8.665000 Random Forest
India Diabetes 2040 8.665000 Random Forest
India Diabetes 2050 8.665000 Random Forest
India Diabetes 2060 8.665000 Random Forest
India Life expectancy 2025 68.648905 Random Forest
India Life expectancy 2030 68.648905 Random Forest
India Life expectancy 2040 68.648905 Random Forest
India Life expectancy 2050 68.648905 Random Forest
India Life expectancy 2060 68.648905 Random Forest
Indonesia Cardiovascular diseases 2025 79.164923 Random Forest
Indonesia Cardiovascular diseases 2030 79.164923 Random Forest
Indonesia Cardiovascular diseases 2040 79.164923 Random Forest
Indonesia Cardiovascular diseases 2050 79.164923 Random Forest
Indonesia Cardiovascular diseases 2060 79.164923 Random Forest
Indonesia Diabetes 2025 7.709000 Random Forest
Indonesia Diabetes 2030 7.709000 Random Forest
Indonesia Diabetes 2040 7.709000 Random Forest
Indonesia Diabetes 2050 7.709000 Random Forest
Indonesia Diabetes 2060 7.709000 Random Forest
Indonesia Life expectancy 2025 69.320874 Random Forest
Indonesia Life expectancy 2030 69.320874 Random Forest
Indonesia Life expectancy 2040 69.320874 Random Forest
Indonesia Life expectancy 2050 69.320874 Random Forest
Indonesia Life expectancy 2060 69.320874 Random Forest
Japan Cardiovascular diseases 2025 39.015181 Random Forest
Japan Cardiovascular diseases 2030 39.015181 Random Forest
Japan Cardiovascular diseases 2040 39.015181 Random Forest
Japan Cardiovascular diseases 2050 39.015181 Random Forest
Japan Cardiovascular diseases 2060 39.015181 Random Forest
Japan Diabetes 2025 6.700000 Random Forest
Japan Diabetes 2030 6.700000 Random Forest
Japan Diabetes 2040 6.700000 Random Forest
Japan Diabetes 2050 6.700000 Random Forest
Japan Diabetes 2060 6.700000 Random Forest
Japan Life expectancy 2025 84.421190 Random Forest
Japan Life expectancy 2030 84.421190 Random Forest
Japan Life expectancy 2040 84.421190 Random Forest
Japan Life expectancy 2050 84.421190 Random Forest
Japan Life expectancy 2060 84.421190 Random Forest
Kenya Cardiovascular diseases 2025 3.819878 Random Forest
Kenya Cardiovascular diseases 2030 3.819878 Random Forest
Kenya Cardiovascular diseases 2040 3.819878 Random Forest
Kenya Cardiovascular diseases 2050 3.819878 Random Forest
Kenya Cardiovascular diseases 2060 3.819878 Random Forest
Kenya Diabetes 2025 5.993000 Random Forest
Kenya Diabetes 2030 5.993000 Random Forest
Kenya Diabetes 2040 5.993000 Random Forest
Kenya Diabetes 2050 5.993000 Random Forest
Kenya Diabetes 2060 5.993000 Random Forest
Kenya Life expectancy 2025 62.775816 Random Forest
Kenya Life expectancy 2030 62.775816 Random Forest
Kenya Life expectancy 2040 62.775816 Random Forest
Kenya Life expectancy 2050 62.775816 Random Forest
Kenya Life expectancy 2060 62.775816 Random Forest
Mexico Cardiovascular diseases 2025 19.754402 Random Forest
Mexico Cardiovascular diseases 2030 19.754402 Random Forest
Mexico Cardiovascular diseases 2040 19.754402 Random Forest
Mexico Cardiovascular diseases 2050 19.754402 Random Forest
Mexico Cardiovascular diseases 2060 19.754402 Random Forest
Mexico Diabetes 2025 11.153000 Random Forest
Mexico Diabetes 2030 11.153000 Random Forest
Mexico Diabetes 2040 11.153000 Random Forest
Mexico Diabetes 2050 11.153000 Random Forest
Mexico Diabetes 2060 11.153000 Random Forest
Mexico Life expectancy 2025 71.889574 Random Forest
Mexico Life expectancy 2030 71.889574 Random Forest
Mexico Life expectancy 2040 71.889574 Random Forest
Mexico Life expectancy 2050 71.889574 Random Forest
Mexico Life expectancy 2060 71.889574 Random Forest
Nigeria Cardiovascular diseases 2025 16.267567 Random Forest
Nigeria Cardiovascular diseases 2030 16.267567 Random Forest
Nigeria Cardiovascular diseases 2040 16.267567 Random Forest
Nigeria Cardiovascular diseases 2050 16.267567 Random Forest
Nigeria Cardiovascular diseases 2060 16.267567 Random Forest
Nigeria Diabetes 2025 6.196000 Random Forest
Nigeria Diabetes 2030 6.196000 Random Forest
Nigeria Diabetes 2040 6.196000 Random Forest
Nigeria Diabetes 2050 6.196000 Random Forest
Nigeria Diabetes 2060 6.196000 Random Forest
Nigeria Life expectancy 2025 47.203708 Random Forest
Nigeria Life expectancy 2030 47.203708 Random Forest
Nigeria Life expectancy 2040 47.203708 Random Forest
Nigeria Life expectancy 2050 47.203708 Random Forest
Nigeria Life expectancy 2060 47.203708 Random Forest
United States Cardiovascular diseases 2025 86.713075 Random Forest
United States Cardiovascular diseases 2030 86.713075 Random Forest
United States Cardiovascular diseases 2040 86.713075 Random Forest
United States Cardiovascular diseases 2050 86.713075 Random Forest
United States Cardiovascular diseases 2060 86.713075 Random Forest
United States Diabetes 2025 7.300000 Random Forest
United States Diabetes 2030 7.300000 Random Forest
United States Diabetes 2040 7.300000 Random Forest
United States Diabetes 2050 7.300000 Random Forest
United States Diabetes 2060 7.300000 Random Forest
United States Life expectancy 2025 78.070789 Random Forest
United States Life expectancy 2030 78.070789 Random Forest
United States Life expectancy 2040 78.070789 Random Forest
United States Life expectancy 2050 78.070789 Random Forest
United States Life expectancy 2060 78.070789 Random Forest
# Final modeling 2024-2074 - ARIMA, Prophet, Random Forest comparison
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.ensemble import RandomForestRegressor
forecast_years = list(range(2024, 2075))
final_forecasts = []
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country]
for target in target_columns:
if target not in df_country.columns:
continue
# Prepare training data (up to 2023)
train_df = df_country[df_country['Year'] <= 2023].copy()
### --- ARIMA ---
try:
ts = train_df.set_index(pd.to_datetime(train_df['Year'], format='%Y'))[target].astype(float)
model_arima = ARIMA(ts, order=(1,1,1)).fit()
arima_forecast = model_arima.predict(start=str(2024), end=str(2074))
arima_forecast = arima_forecast.reset_index()
arima_forecast.columns = ['Year', 'Forecast']
arima_forecast['Year'] = arima_forecast['Year'].dt.year
arima_forecast['Country'] = country
arima_forecast['Target'] = target
arima_forecast['Model'] = 'ARIMA'
except Exception as e:
print(f"ARIMA failed for {country} {target}: {e}")
arima_forecast = pd.DataFrame()
### --- Prophet ---
try:
prophet_df = train_df[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model_prophet = Prophet()
model_prophet.fit(prophet_df)
future_df = pd.DataFrame({'ds': pd.to_datetime(forecast_years, format='%Y')})
prophet_forecast = model_prophet.predict(future_df)
prophet_forecast = prophet_forecast[['ds', 'yhat']].rename(columns={'ds': 'Year', 'yhat': 'Forecast'})
prophet_forecast['Year'] = prophet_forecast['Year'].dt.year
prophet_forecast['Country'] = country
prophet_forecast['Target'] = target
prophet_forecast['Model'] = 'Prophet'
except Exception as e:
print(f"Prophet failed for {country} {target}: {e}")
prophet_forecast = pd.DataFrame()
### --- Random Forest ---
try:
features = selected_features_dict.get(target, [])
features_available = [f for f in features if f in df_country.columns]
# Ensure we have valid features to train on
if not features_available:
raise ValueError("No matching features found for Random Forest training.")
# Train on full data up to 2023
X_train = train_df[features_available]
y_train = train_df[target]
# Drop rows with missing values
X_train = X_train.dropna()
y_train = y_train.loc[X_train.index]
if X_train.empty:
raise ValueError("Training features for Random Forest are empty after dropping NaNs.")
# Prepare features for forecast years
forecast_df = df_country[df_country['Year'].isin(forecast_years)]
X_forecast = forecast_df[features_available]
# Handle missing forecast data
if X_forecast.isnull().any().any():
print(f"Random Forest forecast features contain NaNs for {country} {target}. Filling with forward-fill.")
X_forecast = X_forecast.fillna(method='ffill')
if X_forecast.empty:
raise ValueError("Forecast data is empty or incomplete for Random Forest.")
# Fit and predict
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
rf_forecast_values = model_rf.predict(X_forecast)
rf_forecast = pd.DataFrame({
'Year': forecast_df['Year'].values,
'Forecast': rf_forecast_values,
'Country': country,
'Target': target,
'Model': 'Random Forest'
})
except Exception as e:
print(f"Random Forest failed for {country} {target}: {e}")
rf_forecast = pd.DataFrame()
# Combine forecasts (skip if empty)
for df_model in [arima_forecast, prophet_forecast, rf_forecast]:
if not df_model.empty:
final_forecasts.append(df_model)
# Combine all forecasts into one dataframe
df_final_forecast_all = pd.concat(final_forecasts, ignore_index=True)
# Preview example forecast for some years & countries
selected_years = [2025, 2030, 2040, 2050, 2060, 2074]
df_preview = df_final_forecast_all[df_final_forecast_all['Year'].isin(selected_years)]
df_preview = df_preview.sort_values(['Country', 'Target', 'Model', 'Year']).reset_index(drop=True)
print("\nFinal Forecast Comparison (Selected Years):")
print(df_preview.to_string(index=False))
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5n8q16fp.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qmfjbwhj.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=39209', 'data', 'file=/tmp/tmpfus6j9tk/5n8q16fp.json', 'init=/tmp/tmpfus6j9tk/qmfjbwhj.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_f46j_tc/prophet_model-20250714233830.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:30 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:32 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6xcnrj7r.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/1aiy6i8f.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89585', 'data', 'file=/tmp/tmpfus6j9tk/6xcnrj7r.json', 'init=/tmp/tmpfus6j9tk/1aiy6i8f.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeldlcui7uj/prophet_model-20250714233834.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:34 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b5val6hm.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q0o6w13x.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89070', 'data', 'file=/tmp/tmpfus6j9tk/b5val6hm.json', 'init=/tmp/tmpfus6j9tk/q0o6w13x.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeliwcnbrcd/prophet_model-20250714233836.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:38 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/a_hqmyjb.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0wrgtol7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=26990', 'data', 'file=/tmp/tmpfus6j9tk/a_hqmyjb.json', 'init=/tmp/tmpfus6j9tk/0wrgtol7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelogyse9d5/prophet_model-20250714233839.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:39 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ykzf3c2e.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kxuhsyll.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=65122', 'data', 'file=/tmp/tmpfus6j9tk/ykzf3c2e.json', 'init=/tmp/tmpfus6j9tk/kxuhsyll.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0p1dbrns/prophet_model-20250714233841.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:41 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:42 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gz3j2ktb.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ou5tnmaz.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=54943', 'data', 'file=/tmp/tmpfus6j9tk/gz3j2ktb.json', 'init=/tmp/tmpfus6j9tk/ou5tnmaz.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelc5xxu2a7/prophet_model-20250714233843.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:43 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:44 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b40h8i5o.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/wh5jr2u8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55415', 'data', 'file=/tmp/tmpfus6j9tk/b40h8i5o.json', 'init=/tmp/tmpfus6j9tk/wh5jr2u8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelsf86cak1/prophet_model-20250714233847.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:47 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/n0hmzrts.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/y1783z0z.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=82268', 'data', 'file=/tmp/tmpfus6j9tk/n0hmzrts.json', 'init=/tmp/tmpfus6j9tk/y1783z0z.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model8i_9trqw/prophet_model-20250714233851.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:51 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:52 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yhi4rpe3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/f0lbre56.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34495', 'data', 'file=/tmp/tmpfus6j9tk/yhi4rpe3.json', 'init=/tmp/tmpfus6j9tk/f0lbre56.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_tuout2x/prophet_model-20250714233853.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:53 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:54 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/o1r8h51d.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/l2gjupt5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=11934', 'data', 'file=/tmp/tmpfus6j9tk/o1r8h51d.json', 'init=/tmp/tmpfus6j9tk/l2gjupt5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwi1simzo/prophet_model-20250714233856.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:38:56 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:38:59 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pevft58s.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/g6uh_lx1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=17934', 'data', 'file=/tmp/tmpfus6j9tk/pevft58s.json', 'init=/tmp/tmpfus6j9tk/g6uh_lx1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelqlr1qhrp/prophet_model-20250714233901.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:01 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:01 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dj_hg3az.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/uyikf6hp.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=15954', 'data', 'file=/tmp/tmpfus6j9tk/dj_hg3az.json', 'init=/tmp/tmpfus6j9tk/uyikf6hp.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelehbbmtka/prophet_model-20250714233901.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:01 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:02 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8i06xft9.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_qsftnfl.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=4958', 'data', 'file=/tmp/tmpfus6j9tk/8i06xft9.json', 'init=/tmp/tmpfus6j9tk/_qsftnfl.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model79jsm7pz/prophet_model-20250714233904.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:04 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:06 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/c82e3p_y.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/9n10c11y.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75266', 'data', 'file=/tmp/tmpfus6j9tk/c82e3p_y.json', 'init=/tmp/tmpfus6j9tk/9n10c11y.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model3jm8kpt0/prophet_model-20250714233907.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cpyvlnwy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2f7j1bx1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=223', 'data', 'file=/tmp/tmpfus6j9tk/cpyvlnwy.json', 'init=/tmp/tmpfus6j9tk/2f7j1bx1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxlu27zd0/prophet_model-20250714233908.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:08 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yjyfy1h6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/c7q56pik.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=9284', 'data', 'file=/tmp/tmpfus6j9tk/yjyfy1h6.json', 'init=/tmp/tmpfus6j9tk/c7q56pik.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model4fsps8q9/prophet_model-20250714233910.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ur7qvijj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/e4vzu7zt.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55177', 'data', 'file=/tmp/tmpfus6j9tk/ur7qvijj.json', 'init=/tmp/tmpfus6j9tk/e4vzu7zt.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modely1mdt2jr/prophet_model-20250714233913.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:14 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8koewj35.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zpw6bhlf.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=59250', 'data', 'file=/tmp/tmpfus6j9tk/8koewj35.json', 'init=/tmp/tmpfus6j9tk/zpw6bhlf.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelqgybt1u4/prophet_model-20250714233915.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:15 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:15 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tiaknkss.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ho1ccs47.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=3917', 'data', 'file=/tmp/tmpfus6j9tk/tiaknkss.json', 'init=/tmp/tmpfus6j9tk/ho1ccs47.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelq6eunbir/prophet_model-20250714233916.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:16 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:17 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/gipf4goa.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lsg2800l.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=88776', 'data', 'file=/tmp/tmpfus6j9tk/gipf4goa.json', 'init=/tmp/tmpfus6j9tk/lsg2800l.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0yhkqmi4/prophet_model-20250714233918.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:19 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:20 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7fjqa3vs.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/iuyubswy.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=94754', 'data', 'file=/tmp/tmpfus6j9tk/7fjqa3vs.json', 'init=/tmp/tmpfus6j9tk/iuyubswy.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelgav1nv3g/prophet_model-20250714233920.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:20 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:21 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b9ueydg1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ryrnsq1g.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85161', 'data', 'file=/tmp/tmpfus6j9tk/b9ueydg1.json', 'init=/tmp/tmpfus6j9tk/ryrnsq1g.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelru23is34/prophet_model-20250714233922.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:22 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:23 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/90c0znxj.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ww609hk5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=64894', 'data', 'file=/tmp/tmpfus6j9tk/90c0znxj.json', 'init=/tmp/tmpfus6j9tk/ww609hk5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model635rl_o9/prophet_model-20250714233925.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:25 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:26 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ixdj9zug.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/a2u0k4hd.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=63878', 'data', 'file=/tmp/tmpfus6j9tk/ixdj9zug.json', 'init=/tmp/tmpfus6j9tk/a2u0k4hd.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_7zfsu6q/prophet_model-20250714233927.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:27 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:28 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zyt9f1k_.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fte_fm2g.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=19702', 'data', 'file=/tmp/tmpfus6j9tk/zyt9f1k_.json', 'init=/tmp/tmpfus6j9tk/fte_fm2g.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmutmzng6/prophet_model-20250714233930.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:30 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:30 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/m6dm2loz.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/wmdjx2yx.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=15060', 'data', 'file=/tmp/tmpfus6j9tk/m6dm2loz.json', 'init=/tmp/tmpfus6j9tk/wmdjx2yx.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelllgfeg9d/prophet_model-20250714233931.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:31 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:31 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/baz7osuv.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/x04qz0m5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=35642', 'data', 'file=/tmp/tmpfus6j9tk/baz7osuv.json', 'init=/tmp/tmpfus6j9tk/x04qz0m5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_8o36q9y/prophet_model-20250714233932.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:32 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:33 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h7bc3skw.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2p9nv3hc.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89283', 'data', 'file=/tmp/tmpfus6j9tk/h7bc3skw.json', 'init=/tmp/tmpfus6j9tk/2p9nv3hc.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelucrnyuka/prophet_model-20250714233934.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:34 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ciq05nkd.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/jlnje66o.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29828', 'data', 'file=/tmp/tmpfus6j9tk/ciq05nkd.json', 'init=/tmp/tmpfus6j9tk/jlnje66o.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model03d5egl9/prophet_model-20250714233936.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:36 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/erxgfbxq.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ih6_bpoh.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=2987', 'data', 'file=/tmp/tmpfus6j9tk/erxgfbxq.json', 'init=/tmp/tmpfus6j9tk/ih6_bpoh.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model32_agkcb/prophet_model-20250714233937.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 23:39:37 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 23:39:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Final Forecast Comparison (Selected Years): Year Forecast Country Target Model 2025 28.763815 Bangladesh Cardiovascular diseases ARIMA 2030 29.828125 Bangladesh Cardiovascular diseases ARIMA 2040 31.345929 Bangladesh Cardiovascular diseases ARIMA 2050 32.304351 Bangladesh Cardiovascular diseases ARIMA 2060 32.909549 Bangladesh Cardiovascular diseases ARIMA 2074 33.401687 Bangladesh Cardiovascular diseases ARIMA 2025 23.627725 Bangladesh Cardiovascular diseases Prophet 2030 25.751730 Bangladesh Cardiovascular diseases Prophet 2040 30.023898 Bangladesh Cardiovascular diseases Prophet 2050 34.146654 Bangladesh Cardiovascular diseases Prophet 2060 38.418822 Bangladesh Cardiovascular diseases Prophet 2074 44.220563 Bangladesh Cardiovascular diseases Prophet 2025 28.217349 Bangladesh Cardiovascular diseases Random Forest 2030 28.217349 Bangladesh Cardiovascular diseases Random Forest 2040 28.217349 Bangladesh Cardiovascular diseases Random Forest 2050 28.217349 Bangladesh Cardiovascular diseases Random Forest 2060 28.217349 Bangladesh Cardiovascular diseases Random Forest 2025 9.799995 Bangladesh Diabetes ARIMA 2030 9.799988 Bangladesh Diabetes ARIMA 2040 9.799989 Bangladesh Diabetes ARIMA 2050 9.799989 Bangladesh Diabetes ARIMA 2060 9.799989 Bangladesh Diabetes ARIMA 2074 9.799989 Bangladesh Diabetes ARIMA 2025 7.136394 Bangladesh Diabetes Prophet 2030 6.915001 Bangladesh Diabetes Prophet 2040 6.327117 Bangladesh Diabetes Prophet 2050 5.946488 Bangladesh Diabetes Prophet 2060 5.358604 Bangladesh Diabetes Prophet 2074 4.784272 Bangladesh Diabetes Prophet 2025 9.800000 Bangladesh Diabetes Random Forest 2030 9.800000 Bangladesh Diabetes Random Forest 2040 9.800000 Bangladesh Diabetes Random Forest 2050 9.800000 Bangladesh Diabetes Random Forest 2060 9.800000 Bangladesh Diabetes Random Forest 2025 73.668822 Bangladesh Life expectancy ARIMA 2030 73.645639 Bangladesh Life expectancy ARIMA 2040 73.645637 Bangladesh Life expectancy ARIMA 2050 73.645637 Bangladesh Life expectancy ARIMA 2060 73.645637 Bangladesh Life expectancy ARIMA 2074 73.645637 Bangladesh Life expectancy ARIMA 2025 75.753919 Bangladesh Life expectancy Prophet 2030 77.754694 Bangladesh Life expectancy Prophet 2040 83.554658 Bangladesh Life expectancy Prophet 2050 88.458961 Bangladesh Life expectancy Prophet 2060 94.258925 Bangladesh Life expectancy Prophet 2074 101.304081 Bangladesh Life expectancy Prophet 2025 73.248996 Bangladesh Life expectancy Random Forest 2030 73.248996 Bangladesh Life expectancy Random Forest 2040 73.248996 Bangladesh Life expectancy Random Forest 2050 73.248996 Bangladesh Life expectancy Random Forest 2060 73.248996 Bangladesh Life expectancy Random Forest 2025 39.333730 Brazil Cardiovascular diseases ARIMA 2030 40.191538 Brazil Cardiovascular diseases ARIMA 2040 41.515891 Brazil Cardiovascular diseases ARIMA 2050 42.448928 Brazil Cardiovascular diseases ARIMA 2060 43.106273 Brazil Cardiovascular diseases ARIMA 2074 43.713735 Brazil Cardiovascular diseases ARIMA 2025 35.273665 Brazil Cardiovascular diseases Prophet 2030 38.452606 Brazil Cardiovascular diseases Prophet 2040 44.965198 Brazil Cardiovascular diseases Prophet 2050 50.925486 Brazil Cardiovascular diseases Prophet 2060 57.438079 Brazil Cardiovascular diseases Prophet 2074 65.892943 Brazil Cardiovascular diseases Prophet 2025 38.580863 Brazil Cardiovascular diseases Random Forest 2030 38.580863 Brazil Cardiovascular diseases Random Forest 2040 38.580863 Brazil Cardiovascular diseases Random Forest 2050 38.580863 Brazil Cardiovascular diseases Random Forest 2060 38.580863 Brazil Cardiovascular diseases Random Forest 2025 8.300000 Brazil Diabetes ARIMA 2030 8.300000 Brazil Diabetes ARIMA 2040 8.300000 Brazil Diabetes ARIMA 2050 8.300000 Brazil Diabetes ARIMA 2060 8.300000 Brazil Diabetes ARIMA 2074 8.300000 Brazil Diabetes ARIMA 2025 8.358204 Brazil Diabetes Prophet 2030 8.653546 Brazil Diabetes Prophet 2040 9.103103 Brazil Diabetes Prophet 2050 9.688902 Brazil Diabetes Prophet 2060 10.138459 Brazil Diabetes Prophet 2074 10.931329 Brazil Diabetes Prophet 2025 8.296000 Brazil Diabetes Random Forest 2030 8.296000 Brazil Diabetes Random Forest 2040 8.296000 Brazil Diabetes Random Forest 2050 8.296000 Brazil Diabetes Random Forest 2060 8.296000 Brazil Diabetes Random Forest 2025 76.397587 Brazil Life expectancy ARIMA 2030 77.764180 Brazil Life expectancy ARIMA 2040 80.468134 Brazil Life expectancy ARIMA 2050 83.133577 Brazil Life expectancy ARIMA 2060 85.761056 Brazil Life expectancy ARIMA 2074 89.376778 Brazil Life expectancy ARIMA 2025 75.761316 Brazil Life expectancy Prophet 2030 76.587607 Brazil Life expectancy Prophet 2040 77.958666 Brazil Life expectancy Prophet 2050 79.411766 Brazil Life expectancy Prophet 2060 80.782826 Brazil Life expectancy Prophet 2074 82.800758 Brazil Life expectancy Prophet 2025 75.542449 Brazil Life expectancy Random Forest 2030 75.542449 Brazil Life expectancy Random Forest 2040 75.542449 Brazil Life expectancy Random Forest 2050 75.542449 Brazil Life expectancy Random Forest 2060 75.542449 Brazil Life expectancy Random Forest 2025 35.298232 Germany Cardiovascular diseases ARIMA 2030 35.298232 Germany Cardiovascular diseases ARIMA 2040 35.298232 Germany Cardiovascular diseases ARIMA 2050 35.298232 Germany Cardiovascular diseases ARIMA 2060 35.298232 Germany Cardiovascular diseases ARIMA 2074 35.298232 Germany Cardiovascular diseases ARIMA 2025 35.318850 Germany Cardiovascular diseases Prophet 2030 38.210525 Germany Cardiovascular diseases Prophet 2040 44.220292 Germany Cardiovascular diseases Prophet 2050 49.761779 Germany Cardiovascular diseases Prophet 2060 55.771547 Germany Cardiovascular diseases Prophet 2074 63.623285 Germany Cardiovascular diseases Prophet 2025 35.178712 Germany Cardiovascular diseases Random Forest 2030 35.178712 Germany Cardiovascular diseases Random Forest 2040 35.178712 Germany Cardiovascular diseases Random Forest 2050 35.178712 Germany Cardiovascular diseases Random Forest 2060 35.178712 Germany Cardiovascular diseases Random Forest 2025 5.000000 Germany Diabetes ARIMA 2030 5.000000 Germany Diabetes ARIMA 2040 5.000000 Germany Diabetes ARIMA 2050 5.000000 Germany Diabetes ARIMA 2060 5.000000 Germany Diabetes ARIMA 2074 5.000000 Germany Diabetes ARIMA 2025 2.227197 Germany Diabetes Prophet 2030 1.512707 Germany Diabetes Prophet 2040 -0.100578 Germany Diabetes Prophet 2050 -1.477064 Germany Diabetes Prophet 2060 -3.090349 Germany Diabetes Prophet 2074 -5.064788 Germany Diabetes Prophet 2025 5.000000 Germany Diabetes Random Forest 2030 5.000000 Germany Diabetes Random Forest 2040 5.000000 Germany Diabetes Random Forest 2050 5.000000 Germany Diabetes Random Forest 2060 5.000000 Germany Diabetes Random Forest 2025 81.751537 Germany Life expectancy ARIMA 2030 82.685374 Germany Life expectancy ARIMA 2040 84.549813 Germany Life expectancy ARIMA 2050 86.409949 Germany Life expectancy ARIMA 2060 88.265791 Germany Life expectancy ARIMA 2074 90.856775 Germany Life expectancy ARIMA 2025 81.462485 Germany Life expectancy Prophet 2030 81.878330 Germany Life expectancy Prophet 2040 82.752834 Germany Life expectancy Prophet 2050 83.579200 Germany Life expectancy Prophet 2060 84.453704 Germany Life expectancy Prophet 2074 85.620243 Germany Life expectancy Prophet 2025 81.185123 Germany Life expectancy Random Forest 2030 81.185123 Germany Life expectancy Random Forest 2040 81.185123 Germany Life expectancy Random Forest 2050 81.185123 Germany Life expectancy Random Forest 2060 81.185123 Germany Life expectancy Random Forest 2025 293.292858 India Cardiovascular diseases ARIMA 2030 308.823918 India Cardiovascular diseases ARIMA 2040 332.428048 India Cardiovascular diseases ARIMA 2050 348.702213 India Cardiovascular diseases ARIMA 2060 359.922642 India Cardiovascular diseases ARIMA 2074 370.032480 India Cardiovascular diseases ARIMA 2025 340.302536 India Cardiovascular diseases Prophet 2030 388.835710 India Cardiovascular diseases Prophet 2040 493.542069 India Cardiovascular diseases Prophet 2050 589.911072 India Cardiovascular diseases Prophet 2060 694.617431 India Cardiovascular diseases Prophet 2074 831.201508 India Cardiovascular diseases Prophet 2025 284.381643 India Cardiovascular diseases Random Forest 2030 284.381643 India Cardiovascular diseases Random Forest 2040 284.381643 India Cardiovascular diseases Random Forest 2050 284.381643 India Cardiovascular diseases Random Forest 2060 284.381643 India Cardiovascular diseases Random Forest 2025 8.710015 India Diabetes ARIMA 2030 8.726867 India Diabetes ARIMA 2040 8.741538 India Diabetes ARIMA 2050 8.746115 India Diabetes ARIMA 2060 8.747543 India Diabetes ARIMA 2074 8.748063 India Diabetes ARIMA 2025 9.580568 India Diabetes Prophet 2030 10.104785 India Diabetes Prophet 2040 11.103703 India Diabetes Prophet 2050 12.159224 India Diabetes Prophet 2060 13.158143 India Diabetes Prophet 2074 14.624552 India Diabetes Prophet 2025 8.700000 India Diabetes Random Forest 2030 8.700000 India Diabetes Random Forest 2040 8.700000 India Diabetes Random Forest 2050 8.700000 India Diabetes Random Forest 2060 8.700000 India Diabetes Random Forest 2025 72.817223 India Life expectancy ARIMA 2030 74.853770 India Life expectancy ARIMA 2040 78.926815 India Life expectancy ARIMA 2050 82.999796 India Life expectancy ARIMA 2060 87.072712 India Life expectancy ARIMA 2074 92.774687 India Life expectancy ARIMA 2025 72.063189 India Life expectancy Prophet 2030 73.983538 India Life expectancy Prophet 2040 77.475605 India Life expectancy Prophet 2050 80.870622 India Life expectancy Prophet 2060 84.362689 India Life expectancy Prophet 2074 89.135122 India Life expectancy Prophet 2025 70.768360 India Life expectancy Random Forest 2030 70.768360 India Life expectancy Random Forest 2040 70.768360 India Life expectancy Random Forest 2050 70.768360 India Life expectancy Random Forest 2060 70.768360 India Life expectancy Random Forest 2025 72.145401 Indonesia Cardiovascular diseases ARIMA 2030 72.145401 Indonesia Cardiovascular diseases ARIMA 2040 72.145401 Indonesia Cardiovascular diseases ARIMA 2050 72.145401 Indonesia Cardiovascular diseases ARIMA 2060 72.145401 Indonesia Cardiovascular diseases ARIMA 2074 72.145401 Indonesia Cardiovascular diseases ARIMA 2025 66.888434 Indonesia Cardiovascular diseases Prophet 2030 47.665286 Indonesia Cardiovascular diseases Prophet 2040 6.782155 Indonesia Cardiovascular diseases Prophet 2050 -29.613792 Indonesia Cardiovascular diseases Prophet 2060 -70.496922 Indonesia Cardiovascular diseases Prophet 2074 -122.348685 Indonesia Cardiovascular diseases Prophet 2025 72.403319 Indonesia Cardiovascular diseases Random Forest 2030 72.403319 Indonesia Cardiovascular diseases Random Forest 2040 72.403319 Indonesia Cardiovascular diseases Random Forest 2050 72.403319 Indonesia Cardiovascular diseases Random Forest 2060 72.403319 Indonesia Cardiovascular diseases Random Forest 2025 7.700000 Indonesia Diabetes ARIMA 2030 7.700000 Indonesia Diabetes ARIMA 2040 7.700000 Indonesia Diabetes ARIMA 2050 7.700000 Indonesia Diabetes ARIMA 2060 7.700000 Indonesia Diabetes ARIMA 2074 7.700000 Indonesia Diabetes ARIMA 2025 7.237657 Indonesia Diabetes Prophet 2030 7.399591 Indonesia Diabetes Prophet 2040 7.595297 Indonesia Diabetes Prophet 2050 7.930510 Indonesia Diabetes Prophet 2060 8.126216 Indonesia Diabetes Prophet 2074 8.567613 Indonesia Diabetes Prophet 2025 7.700000 Indonesia Diabetes Random Forest 2030 7.700000 Indonesia Diabetes Random Forest 2040 7.700000 Indonesia Diabetes Random Forest 2050 7.700000 Indonesia Diabetes Random Forest 2060 7.700000 Indonesia Diabetes Random Forest 2025 71.027487 Indonesia Life expectancy ARIMA 2030 71.027287 Indonesia Life expectancy ARIMA 2040 71.027287 Indonesia Life expectancy ARIMA 2050 71.027287 Indonesia Life expectancy ARIMA 2060 71.027287 Indonesia Life expectancy ARIMA 2074 71.027287 Indonesia Life expectancy ARIMA 2025 70.713068 Indonesia Life expectancy Prophet 2030 72.092003 Indonesia Life expectancy Prophet 2040 73.899635 Indonesia Life expectancy Prophet 2050 75.861300 Indonesia Life expectancy Prophet 2060 77.668931 Indonesia Life expectancy Prophet 2074 80.384456 Indonesia Life expectancy Prophet 2025 70.450001 Indonesia Life expectancy Random Forest 2030 70.450001 Indonesia Life expectancy Random Forest 2040 70.450001 Indonesia Life expectancy Random Forest 2050 70.450001 Indonesia Life expectancy Random Forest 2060 70.450001 Indonesia Life expectancy Random Forest 2025 41.789357 Japan Cardiovascular diseases ARIMA 2030 42.892306 Japan Cardiovascular diseases ARIMA 2040 44.523445 Japan Cardiovascular diseases ARIMA 2050 45.606548 Japan Cardiovascular diseases ARIMA 2060 46.325745 Japan Cardiovascular diseases ARIMA 2074 46.945887 Japan Cardiovascular diseases ARIMA 2025 36.754197 Japan Cardiovascular diseases Prophet 2030 39.971797 Japan Cardiovascular diseases Prophet 2040 46.540734 Japan Cardiovascular diseases Prophet 2050 52.803979 Japan Cardiovascular diseases Prophet 2060 59.372916 Japan Cardiovascular diseases Prophet 2074 68.202598 Japan Cardiovascular diseases Prophet 2025 40.597370 Japan Cardiovascular diseases Random Forest 2030 40.597370 Japan Cardiovascular diseases Random Forest 2040 40.597370 Japan Cardiovascular diseases Random Forest 2050 40.597370 Japan Cardiovascular diseases Random Forest 2060 40.597370 Japan Cardiovascular diseases Random Forest 2025 6.700000 Japan Diabetes ARIMA 2030 6.700000 Japan Diabetes ARIMA 2040 6.700000 Japan Diabetes ARIMA 2050 6.700000 Japan Diabetes ARIMA 2060 6.700000 Japan Diabetes ARIMA 2074 6.700000 Japan Diabetes ARIMA 2025 5.256912 Japan Diabetes Prophet 2030 4.951835 Japan Diabetes Prophet 2040 4.157396 Japan Diabetes Prophet 2050 3.573266 Japan Diabetes Prophet 2060 2.778828 Japan Diabetes Prophet 2074 1.918983 Japan Diabetes Prophet 2025 6.700000 Japan Diabetes Random Forest 2030 6.700000 Japan Diabetes Random Forest 2040 6.700000 Japan Diabetes Random Forest 2050 6.700000 Japan Diabetes Random Forest 2060 6.700000 Japan Diabetes Random Forest 2025 84.963514 Japan Life expectancy ARIMA 2030 85.580826 Japan Life expectancy ARIMA 2040 86.770762 Japan Life expectancy ARIMA 2050 87.903504 Japan Life expectancy ARIMA 2060 88.981802 Japan Life expectancy ARIMA 2074 90.404909 Japan Life expectancy ARIMA 2025 85.082724 Japan Life expectancy Prophet 2030 85.843020 Japan Life expectancy Prophet 2040 87.341249 Japan Life expectancy Prophet 2050 88.727508 Japan Life expectancy Prophet 2060 90.225737 Japan Life expectancy Prophet 2074 92.188893 Japan Life expectancy Prophet 2025 84.562189 Japan Life expectancy Random Forest 2030 84.562189 Japan Life expectancy Random Forest 2040 84.562189 Japan Life expectancy Random Forest 2050 84.562189 Japan Life expectancy Random Forest 2060 84.562189 Japan Life expectancy Random Forest 2025 3.497143 Kenya Cardiovascular diseases ARIMA 2030 3.497063 Kenya Cardiovascular diseases ARIMA 2040 3.497066 Kenya Cardiovascular diseases ARIMA 2050 3.497066 Kenya Cardiovascular diseases ARIMA 2060 3.497066 Kenya Cardiovascular diseases ARIMA 2074 3.497066 Kenya Cardiovascular diseases ARIMA 2025 2.537589 Kenya Cardiovascular diseases Prophet 2030 2.252614 Kenya Cardiovascular diseases Prophet 2040 1.607427 Kenya Cardiovascular diseases Prophet 2050 1.079396 Kenya Cardiovascular diseases Prophet 2060 0.434209 Kenya Cardiovascular diseases Prophet 2074 -0.328465 Kenya Cardiovascular diseases Prophet 2025 3.526365 Kenya Cardiovascular diseases Random Forest 2030 3.526365 Kenya Cardiovascular diseases Random Forest 2040 3.526365 Kenya Cardiovascular diseases Random Forest 2050 3.526365 Kenya Cardiovascular diseases Random Forest 2060 3.526365 Kenya Cardiovascular diseases Random Forest 2025 5.999969 Kenya Diabetes ARIMA 2030 5.999704 Kenya Diabetes ARIMA 2040 5.999765 Kenya Diabetes ARIMA 2050 5.999790 Kenya Diabetes ARIMA 2060 5.999800 Kenya Diabetes ARIMA 2074 5.999805 Kenya Diabetes ARIMA 2025 2.714910 Kenya Diabetes Prophet 2030 2.109156 Kenya Diabetes Prophet 2040 0.755248 Kenya Diabetes Prophet 2050 -0.354044 Kenya Diabetes Prophet 2060 -1.707953 Kenya Diabetes Prophet 2074 -3.309884 Kenya Diabetes Prophet 2025 6.000000 Kenya Diabetes Random Forest 2030 6.000000 Kenya Diabetes Random Forest 2040 6.000000 Kenya Diabetes Random Forest 2050 6.000000 Kenya Diabetes Random Forest 2060 6.000000 Kenya Diabetes Random Forest 2025 64.457575 Kenya Life expectancy ARIMA 2030 66.188507 Kenya Life expectancy ARIMA 2040 68.657408 Kenya Life expectancy ARIMA 2050 70.216780 Kenya Life expectancy ARIMA 2060 71.201688 Kenya Life expectancy ARIMA 2074 72.002826 Kenya Life expectancy ARIMA 2025 63.881682 Kenya Life expectancy Prophet 2030 65.068676 Kenya Life expectancy Prophet 2040 67.162236 Kenya Life expectancy Prophet 2050 69.448219 Kenya Life expectancy Prophet 2060 71.541779 Kenya Life expectancy Prophet 2074 74.703670 Kenya Life expectancy Prophet 2025 63.447256 Kenya Life expectancy Random Forest 2030 63.447256 Kenya Life expectancy Random Forest 2040 63.447256 Kenya Life expectancy Random Forest 2050 63.447256 Kenya Life expectancy Random Forest 2060 63.447256 Kenya Life expectancy Random Forest 2025 22.989318 Mexico Cardiovascular diseases ARIMA 2030 24.997848 Mexico Cardiovascular diseases ARIMA 2040 28.424030 Mexico Cardiovascular diseases ARIMA 2050 31.190024 Mexico Cardiovascular diseases ARIMA 2060 33.423041 Mexico Cardiovascular diseases ARIMA 2074 35.845544 Mexico Cardiovascular diseases ARIMA 2025 23.806455 Mexico Cardiovascular diseases Prophet 2030 27.151329 Mexico Cardiovascular diseases Prophet 2040 34.453527 Mexico Cardiovascular diseases Prophet 2050 41.215192 Mexico Cardiovascular diseases Prophet 2060 48.517391 Mexico Cardiovascular diseases Prophet 2074 58.091828 Mexico Cardiovascular diseases Prophet 2025 22.001783 Mexico Cardiovascular diseases Random Forest 2030 22.001783 Mexico Cardiovascular diseases Random Forest 2040 22.001783 Mexico Cardiovascular diseases Random Forest 2050 22.001783 Mexico Cardiovascular diseases Random Forest 2060 22.001783 Mexico Cardiovascular diseases Random Forest 2025 11.200000 Mexico Diabetes ARIMA 2030 11.200000 Mexico Diabetes ARIMA 2040 11.200000 Mexico Diabetes ARIMA 2050 11.200000 Mexico Diabetes ARIMA 2060 11.200000 Mexico Diabetes ARIMA 2074 11.200000 Mexico Diabetes ARIMA 2025 10.884619 Mexico Diabetes Prophet 2030 11.162304 Mexico Diabetes Prophet 2040 11.516252 Mexico Diabetes Prophet 2050 12.094992 Mexico Diabetes Prophet 2060 12.448939 Mexico Diabetes Prophet 2074 13.214218 Mexico Diabetes Prophet 2025 11.155000 Mexico Diabetes Random Forest 2030 11.155000 Mexico Diabetes Random Forest 2040 11.155000 Mexico Diabetes Random Forest 2050 11.155000 Mexico Diabetes Random Forest 2060 11.155000 Mexico Diabetes Random Forest 2025 74.853065 Mexico Life expectancy ARIMA 2030 74.849014 Mexico Life expectancy ARIMA 2040 74.849014 Mexico Life expectancy ARIMA 2050 74.849014 Mexico Life expectancy ARIMA 2060 74.849014 Mexico Life expectancy ARIMA 2074 74.849014 Mexico Life expectancy ARIMA 2025 73.028491 Mexico Life expectancy Prophet 2030 73.089136 Mexico Life expectancy Prophet 2040 72.342096 Mexico Life expectancy Prophet 2050 72.094169 Mexico Life expectancy Prophet 2060 71.347129 Mexico Life expectancy Prophet 2074 70.900208 Mexico Life expectancy Prophet 2025 73.796821 Mexico Life expectancy Random Forest 2030 73.796821 Mexico Life expectancy Random Forest 2040 73.796821 Mexico Life expectancy Random Forest 2050 73.796821 Mexico Life expectancy Random Forest 2060 73.796821 Mexico Life expectancy Random Forest 2025 17.992037 Nigeria Cardiovascular diseases ARIMA 2030 17.992039 Nigeria Cardiovascular diseases ARIMA 2040 17.992039 Nigeria Cardiovascular diseases ARIMA 2050 17.992039 Nigeria Cardiovascular diseases ARIMA 2060 17.992039 Nigeria Cardiovascular diseases ARIMA 2074 17.992039 Nigeria Cardiovascular diseases ARIMA 2025 18.712944 Nigeria Cardiovascular diseases Prophet 2030 20.746966 Nigeria Cardiovascular diseases Prophet 2040 25.124451 Nigeria Cardiovascular diseases Prophet 2050 29.203230 Nigeria Cardiovascular diseases Prophet 2060 33.580715 Nigeria Cardiovascular diseases Prophet 2074 39.350747 Nigeria Cardiovascular diseases Prophet 2025 17.869022 Nigeria Cardiovascular diseases Random Forest 2030 17.869022 Nigeria Cardiovascular diseases Random Forest 2040 17.869022 Nigeria Cardiovascular diseases Random Forest 2050 17.869022 Nigeria Cardiovascular diseases Random Forest 2060 17.869022 Nigeria Cardiovascular diseases Random Forest 2025 6.200000 Nigeria Diabetes ARIMA 2030 6.200000 Nigeria Diabetes ARIMA 2040 6.200000 Nigeria Diabetes ARIMA 2050 6.200000 Nigeria Diabetes ARIMA 2060 6.200000 Nigeria Diabetes ARIMA 2074 6.200000 Nigeria Diabetes ARIMA 2025 6.448366 Nigeria Diabetes Prophet 2030 6.784282 Nigeria Diabetes Prophet 2040 7.369781 Nigeria Diabetes Prophet 2050 8.062375 Nigeria Diabetes Prophet 2060 8.647875 Nigeria Diabetes Prophet 2074 9.596087 Nigeria Diabetes Prophet 2025 6.200000 Nigeria Diabetes Random Forest 2030 6.200000 Nigeria Diabetes Random Forest 2040 6.200000 Nigeria Diabetes Random Forest 2050 6.200000 Nigeria Diabetes Random Forest 2060 6.200000 Nigeria Diabetes Random Forest 2025 55.139161 Nigeria Life expectancy ARIMA 2030 56.511133 Nigeria Life expectancy ARIMA 2040 58.269731 Nigeria Life expectancy ARIMA 2050 59.226659 Nigeria Life expectancy ARIMA 2060 59.747365 Nigeria Life expectancy ARIMA 2074 60.103776 Nigeria Life expectancy ARIMA 2025 54.437559 Nigeria Life expectancy Prophet 2030 55.506037 Nigeria Life expectancy Prophet 2040 57.698935 Nigeria Life expectancy Prophet 2050 59.883102 Nigeria Life expectancy Prophet 2060 62.075999 Nigeria Life expectancy Prophet 2074 65.135579 Nigeria Life expectancy Prophet 2025 53.665291 Nigeria Life expectancy Random Forest 2030 53.665291 Nigeria Life expectancy Random Forest 2040 53.665291 Nigeria Life expectancy Random Forest 2050 53.665291 Nigeria Life expectancy Random Forest 2060 53.665291 Nigeria Life expectancy Random Forest 2025 92.088422 United States Cardiovascular diseases ARIMA 2030 92.088422 United States Cardiovascular diseases ARIMA 2040 92.088422 United States Cardiovascular diseases ARIMA 2050 92.088422 United States Cardiovascular diseases ARIMA 2060 92.088422 United States Cardiovascular diseases ARIMA 2074 92.088422 United States Cardiovascular diseases ARIMA 2025 85.782008 United States Cardiovascular diseases Prophet 2030 92.093667 United States Cardiovascular diseases Prophet 2040 105.312488 United States Cardiovascular diseases Prophet 2050 117.156631 United States Cardiovascular diseases Prophet 2060 130.375453 United States Cardiovascular diseases Prophet 2074 147.232188 United States Cardiovascular diseases Prophet 2025 91.479621 United States Cardiovascular diseases Random Forest 2030 91.479621 United States Cardiovascular diseases Random Forest 2040 91.479621 United States Cardiovascular diseases Random Forest 2050 91.479621 United States Cardiovascular diseases Random Forest 2060 91.479621 United States Cardiovascular diseases Random Forest 2025 7.303214 United States Diabetes ARIMA 2030 7.307561 United States Diabetes ARIMA 2040 7.309988 United States Diabetes ARIMA 2050 7.310374 United States Diabetes ARIMA 2060 7.310435 United States Diabetes ARIMA 2074 7.310446 United States Diabetes ARIMA 2025 7.825907 United States Diabetes Prophet 2030 8.173607 United States Diabetes Prophet 2040 8.813006 United States Diabetes Prophet 2050 9.505040 United States Diabetes Prophet 2060 10.144439 United States Diabetes Prophet 2074 11.102760 United States Diabetes Prophet 2025 7.300000 United States Diabetes Random Forest 2030 7.300000 United States Diabetes Random Forest 2040 7.300000 United States Diabetes Random Forest 2050 7.300000 United States Diabetes Random Forest 2060 7.300000 United States Diabetes Random Forest 2025 79.529912 United States Life expectancy ARIMA 2030 79.533542 United States Life expectancy ARIMA 2040 79.533542 United States Life expectancy ARIMA 2050 79.533542 United States Life expectancy ARIMA 2060 79.533542 United States Life expectancy ARIMA 2074 79.533542 United States Life expectancy ARIMA 2025 78.160042 United States Life expectancy Prophet 2030 78.306735 United States Life expectancy Prophet 2040 78.078542 United States Life expectancy Prophet 2050 78.144730 United States Life expectancy Prophet 2060 77.916538 United States Life expectancy Prophet 2074 77.950325 United States Life expectancy Prophet 2025 78.871911 United States Life expectancy Random Forest 2030 78.871911 United States Life expectancy Random Forest 2040 78.871911 United States Life expectancy Random Forest 2050 78.871911 United States Life expectancy Random Forest 2060 78.871911 United States Life expectancy Random Forest
# Visaul Comparison ARIMA,Prophet and Random Forest
import seaborn as sns
import matplotlib.pyplot as plt
for country in selected_countries:
for target in target_columns:
df_plot = df_final_forecast_all[
(df_final_forecast_all['Country'] == country) &
(df_final_forecast_all['Target'] == target)
]
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_plot, x='Year', y='Forecast', hue='Model')
plt.title(f"{country} - {target} Forecast (2024–2074)")
plt.show()
# Time-based data split
train_end_year = 2020
val_years = [2021, 2022, 2023]
forecast_years = list(range(2024, 2076)) # up to 2075
## Actual and Forecast comparison 2021 - 2023 - ARIMA
print("Starting ARIMA training, validation, and forecasting...")
results = [] # store model outputs
metrics = [] # store evaluation metrics
arima_val_all = []
from sklearn.metrics import mean_squared_error, mean_absolute_error
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import RandomForestRegressor
for country in selected_countries:
print(f"\nProcessing country: {country}")
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].copy()
for target in target_columns:
if target not in df_country.columns:
print(f"Skipping target {target} for country {country} (not in columns)")
continue
print(f" Processing target: {target}")
train_df = df_country[df_country['Year'] <= train_end_year].copy()
val_df = df_country[df_country['Year'].isin(val_years)].copy()
forecast_df = df_country[df_country['Year'].isin(forecast_years)].copy()
# Prepare time series
ts_train = train_df.set_index(pd.to_datetime(train_df['Year'], format='%Y'))[target].astype(float)
print(f" Training data points: {len(ts_train)}")
### --- ARIMA ---
try:
model_arima = ARIMA(ts_train, order=(1,1,1)).fit()
# Forecast 2021–2023 (Validation)
arima_val = model_arima.predict(start='2021', end='2023')
arima_val = arima_val.reset_index()
arima_val.columns = ['Year', 'Forecast']
arima_val['Year'] = arima_val['Year'].dt.year
# Align actual values
actuals = val_df.set_index('Year')[target].reindex(arima_val['Year']).values
arima_val['Actual'] = actuals
# Check for missing actual values
if pd.isnull(arima_val['Actual']).any():
print(f" Warning: Missing actual values in validation for {country} - {target}")
arima_val['Model'] = 'ARIMA'
arima_val['Target'] = target
arima_val['Country'] = country
# Evaluation
mae = mean_absolute_error(arima_val['Actual'], arima_val['Forecast'])
rmse = mean_squared_error(arima_val['Actual'], arima_val['Forecast']) ** 0.5
metrics.append({'Country': country, 'Target': target, 'Model': 'ARIMA', 'MAE': mae, 'RMSE': rmse})
print(f" Validation MAE: {mae:.4f}, RMSE: {rmse:.4f}")
# Forecast 2024–2074 (note changed end to 2074 to match your forecast_years)
arima_forecast = model_arima.predict(start='2024', end='2074').reset_index()
arima_forecast.columns = ['Year', 'Forecast']
arima_forecast['Year'] = arima_forecast['Year'].dt.year
arima_forecast['Model'] = 'ARIMA'
arima_forecast['Target'] = target
arima_forecast['Country'] = country
arima_val_all.append(arima_val) # in ARIMA block
results.append(arima_forecast)
print(f" Forecast for 2024-2074 done, points: {len(arima_forecast)}")
except Exception as e:
print(f"ARIMA failed for {country}-{target}: {e}")
print("\nAll done!")
print(f"Total results dataframes: {len(results)}")
print(f"Total metric entries: {len(metrics)}")
# Optionally, convert metrics list to DataFrame and print
import pandas as pd
df_metrics = pd.DataFrame(metrics)
print("\nValidation metrics summary:")
print(df_metrics)
Starting ARIMA training, validation, and forecasting...
Processing country: United States
Processing target: Life expectancy
Training data points: 71
Validation MAE: 1.6505, RMSE: 1.9969
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 1.1904, RMSE: 1.1904
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0075, RMSE: 0.0080
Forecast for 2024-2074 done, points: 51
Processing country: Germany
Processing target: Life expectancy
Training data points: 71
Validation MAE: 0.3551, RMSE: 0.4746
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 0.4339, RMSE: 0.4339
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: Japan
Processing target: Life expectancy
Training data points: 71
Validation MAE: 0.5732, RMSE: 0.6387
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 1.5380, RMSE: 1.5477
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: Brazil
Processing target: Life expectancy
Training data points: 71
Validation MAE: 2.4778, RMSE: 3.0096
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 1.8161, RMSE: 1.8195
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: India
Processing target: Life expectancy
Training data points: 71
Validation MAE: 1.5416, RMSE: 1.9737
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 19.1265, RMSE: 19.6630
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0185, RMSE: 0.0197
Forecast for 2024-2074 done, points: 51
Processing country: Indonesia
Processing target: Life expectancy
Training data points: 71
Validation MAE: 1.8730, RMSE: 1.8872
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 8.4755, RMSE: 8.4866
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: Nigeria
Processing target: Life expectancy
Training data points: 71
Validation MAE: 0.6316, RMSE: 0.7003
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 0.7163, RMSE: 0.7164
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: Kenya
Processing target: Life expectancy
Training data points: 71
Validation MAE: 2.7616, RMSE: 3.2353
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 0.1218, RMSE: 0.1218
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0003, RMSE: 0.0004
Forecast for 2024-2074 done, points: 51
Processing country: Mexico
Processing target: Life expectancy
Training data points: 71
Validation MAE: 5.2414, RMSE: 6.2245
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 0.4802, RMSE: 0.5788
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
Processing country: Bangladesh
Processing target: Life expectancy
Training data points: 71
Validation MAE: 2.0438, RMSE: 2.3127
Forecast for 2024-2074 done, points: 51
Processing target: Cardiovascular diseases
Training data points: 71
Validation MAE: 1.1376, RMSE: 1.1756
Forecast for 2024-2074 done, points: 51
Processing target: Diabetes
Training data points: 71
Validation MAE: 0.0000, RMSE: 0.0000
Forecast for 2024-2074 done, points: 51
All done!
Total results dataframes: 30
Total metric entries: 30
Validation metrics summary:
Country Target Model MAE RMSE
0 United States Life expectancy ARIMA 1.650538 1.996910
1 United States Cardiovascular diseases ARIMA 1.190368 1.190369
2 United States Diabetes ARIMA 0.007537 0.007983
3 Germany Life expectancy ARIMA 0.355071 0.474573
4 Germany Cardiovascular diseases ARIMA 0.433923 0.433925
5 Germany Diabetes ARIMA 0.000000 0.000000
6 Japan Life expectancy ARIMA 0.573244 0.638746
7 Japan Cardiovascular diseases ARIMA 1.538012 1.547668
8 Japan Diabetes ARIMA 0.000000 0.000000
9 Brazil Life expectancy ARIMA 2.477791 3.009573
10 Brazil Cardiovascular diseases ARIMA 1.816065 1.819507
11 Brazil Diabetes ARIMA 0.000000 0.000000
12 India Life expectancy ARIMA 1.541640 1.973657
13 India Cardiovascular diseases ARIMA 19.126507 19.662985
14 India Diabetes ARIMA 0.018515 0.019744
15 Indonesia Life expectancy ARIMA 1.873008 1.887179
16 Indonesia Cardiovascular diseases ARIMA 8.475548 8.486563
17 Indonesia Diabetes ARIMA 0.000000 0.000000
18 Nigeria Life expectancy ARIMA 0.631595 0.700330
19 Nigeria Cardiovascular diseases ARIMA 0.716343 0.716350
20 Nigeria Diabetes ARIMA 0.000000 0.000000
21 Kenya Life expectancy ARIMA 2.761611 3.235337
22 Kenya Cardiovascular diseases ARIMA 0.121752 0.121752
23 Kenya Diabetes ARIMA 0.000323 0.000379
24 Mexico Life expectancy ARIMA 5.241353 6.224500
25 Mexico Cardiovascular diseases ARIMA 0.480166 0.578806
26 Mexico Diabetes ARIMA 0.000000 0.000000
27 Bangladesh Life expectancy ARIMA 2.043810 2.312728
28 Bangladesh Cardiovascular diseases ARIMA 1.137589 1.175582
29 Bangladesh Diabetes ARIMA 0.000033 0.000036
# ## Actual and Forecast comparison 2021 - 2023 - - Prophet - REVISED
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
# Initialize lists to store results and metrics
results = []
metrics = []
prophet_val_all = []
# Loop through countries and target variables
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].copy()
for target in target_columns:
if target not in df_country.columns:
continue
# Split data into train, validation, and forecast sets
train_df = df_country[df_country['Year'] <= train_end_year].copy() # train up to 2020
val_df = df_country[df_country['Year'].isin(val_years)].copy() # validation 2021-2023
try:
# Prepare Prophet training dataframe
prophet_df = train_df[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
# Fit Prophet model
model_prophet = Prophet()
model_prophet.fit(prophet_df)
# Prepare future dataframe for years 2021–2075
future_years = list(range(val_years[0], forecast_years[-1] + 1))
future_df = pd.DataFrame({'ds': pd.to_datetime(future_years, format='%Y')})
# Predict future
forecast_prophet = model_prophet.predict(future_df)[['ds', 'yhat']]
forecast_prophet.columns = ['Year', 'Forecast']
forecast_prophet['Year'] = forecast_prophet['Year'].dt.year
# Validation predictions (2021-2023)
prophet_val = forecast_prophet[forecast_prophet['Year'].isin(val_years)].copy()
prophet_val['Actual'] = val_df.set_index('Year')[target].reindex(prophet_val['Year']).values
# Add metadata
prophet_val['Model'] = 'Prophet'
prophet_val['Target'] = target
prophet_val['Country'] = country
# Calculate metrics for validation period
mae = mean_absolute_error(prophet_val['Actual'], prophet_val['Forecast'])
rmse = mean_squared_error(prophet_val['Actual'], prophet_val['Forecast']) ** 0.5
metrics.append({
'Country': country,
'Target': target,
'Model': 'Prophet',
'MAE': mae,
'RMSE': rmse
})
# Forecast 2024–2075
prophet_forecast = forecast_prophet[forecast_prophet['Year'].isin(forecast_years)].copy()
prophet_forecast['Model'] = 'Prophet'
prophet_forecast['Target'] = target
prophet_forecast['Country'] = country
# Append forecast results
results.append(prophet_forecast)
prophet_val_all.append(prophet_val) # in Prophet block
except Exception as e:
print(f"Prophet failed for {country}-{target}: {e}")
# After looping, convert metrics list to DataFrame and show summary
df_metrics = pd.DataFrame(metrics)
print("\nProphet Model Validation Metrics:")
print(df_metrics)
# Optional: Save metrics to CSV
# df_metrics.to_csv('prophet_validation_metrics.csv', index=False)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hui4l3oe.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/i_1t15tb.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=18183', 'data', 'file=/tmp/tmpfus6j9tk/hui4l3oe.json', 'init=/tmp/tmpfus6j9tk/i_1t15tb.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_9q411uu/prophet_model-20250715004239.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:39 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:40 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j29kb1bg.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qie9n2h9.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=70061', 'data', 'file=/tmp/tmpfus6j9tk/j29kb1bg.json', 'init=/tmp/tmpfus6j9tk/qie9n2h9.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelk57t7iwc/prophet_model-20250715004240.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:40 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:41 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/unj8sz_j.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/qofaymjw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=15898', 'data', 'file=/tmp/tmpfus6j9tk/unj8sz_j.json', 'init=/tmp/tmpfus6j9tk/qofaymjw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeld57jac4s/prophet_model-20250715004241.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:41 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:41 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/w7dpmdm6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h4yn7k5m.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23402', 'data', 'file=/tmp/tmpfus6j9tk/w7dpmdm6.json', 'init=/tmp/tmpfus6j9tk/h4yn7k5m.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelzjdrwp9a/prophet_model-20250715004242.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:42 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:42 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7vx18r4e.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/b2yv3w3d.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=51555', 'data', 'file=/tmp/tmpfus6j9tk/7vx18r4e.json', 'init=/tmp/tmpfus6j9tk/b2yv3w3d.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeldl9zgrb_/prophet_model-20250715004243.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:43 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:44 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/x8i848ye.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/z7fbd0f3.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75071', 'data', 'file=/tmp/tmpfus6j9tk/x8i848ye.json', 'init=/tmp/tmpfus6j9tk/z7fbd0f3.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model7lj436kl/prophet_model-20250715004244.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:46 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_urxxisl.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kftsyh7l.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=59199', 'data', 'file=/tmp/tmpfus6j9tk/_urxxisl.json', 'init=/tmp/tmpfus6j9tk/kftsyh7l.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model41oi490f/prophet_model-20250715004246.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:47 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0ehllk0f.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lxlaxayx.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89501', 'data', 'file=/tmp/tmpfus6j9tk/0ehllk0f.json', 'init=/tmp/tmpfus6j9tk/lxlaxayx.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeltt3ar5re/prophet_model-20250715004248.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:48 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:48 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2w6ikda6.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6ejaq6k0.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=77282', 'data', 'file=/tmp/tmpfus6j9tk/2w6ikda6.json', 'init=/tmp/tmpfus6j9tk/6ejaq6k0.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model94fw5qy5/prophet_model-20250715004248.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:48 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:49 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ki1c5gq7.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6ixnkcy5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=12603', 'data', 'file=/tmp/tmpfus6j9tk/ki1c5gq7.json', 'init=/tmp/tmpfus6j9tk/6ixnkcy5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelo89ixmq1/prophet_model-20250715004249.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bapp8uno.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/vllua0px.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=61434', 'data', 'file=/tmp/tmpfus6j9tk/bapp8uno.json', 'init=/tmp/tmpfus6j9tk/vllua0px.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelut5mf18z/prophet_model-20250715004250.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:50 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:51 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zdmzhgeh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lz26ukxl.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=44433', 'data', 'file=/tmp/tmpfus6j9tk/zdmzhgeh.json', 'init=/tmp/tmpfus6j9tk/lz26ukxl.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelbfmnklqw/prophet_model-20250715004251.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:51 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:52 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/sa7earv0.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/53owm4dj.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=82566', 'data', 'file=/tmp/tmpfus6j9tk/sa7earv0.json', 'init=/tmp/tmpfus6j9tk/53owm4dj.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelyexy_2jm/prophet_model-20250715004252.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:52 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:53 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ci4sbd_r.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3cz2kp25.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=92238', 'data', 'file=/tmp/tmpfus6j9tk/ci4sbd_r.json', 'init=/tmp/tmpfus6j9tk/3cz2kp25.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelsy1xjcy2/prophet_model-20250715004253.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:53 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:54 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/alhvh0t1.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/r_n2ql6r.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=20146', 'data', 'file=/tmp/tmpfus6j9tk/alhvh0t1.json', 'init=/tmp/tmpfus6j9tk/r_n2ql6r.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhosuskrm/prophet_model-20250715004255.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:55 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/q7ta4zql.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/j9j4f6mu.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=20280', 'data', 'file=/tmp/tmpfus6j9tk/q7ta4zql.json', 'init=/tmp/tmpfus6j9tk/j9j4f6mu.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelvuhpkt0f/prophet_model-20250715004256.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:56 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_czjjeug.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rlo8kwgl.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=4335', 'data', 'file=/tmp/tmpfus6j9tk/_czjjeug.json', 'init=/tmp/tmpfus6j9tk/rlo8kwgl.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model3omvznwk/prophet_model-20250715004257.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:57 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:42:58 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/euxi0pab.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d_lnsi7o.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=53015', 'data', 'file=/tmp/tmpfus6j9tk/euxi0pab.json', 'init=/tmp/tmpfus6j9tk/d_lnsi7o.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model63r36fxa/prophet_model-20250715004258.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:42:58 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:00 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ks8cvuxa.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yekqq37x.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48019', 'data', 'file=/tmp/tmpfus6j9tk/ks8cvuxa.json', 'init=/tmp/tmpfus6j9tk/yekqq37x.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelwaz9jleb/prophet_model-20250715004300.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:00 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:01 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/clps_hqg.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7wboj7a7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=64533', 'data', 'file=/tmp/tmpfus6j9tk/clps_hqg.json', 'init=/tmp/tmpfus6j9tk/7wboj7a7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpschjta7/prophet_model-20250715004302.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:02 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:02 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v69332z5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/k_jah5tf.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=18847', 'data', 'file=/tmp/tmpfus6j9tk/v69332z5.json', 'init=/tmp/tmpfus6j9tk/k_jah5tf.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelml_pvf5h/prophet_model-20250715004303.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:03 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dxzrzasr.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/a06351rk.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=12380', 'data', 'file=/tmp/tmpfus6j9tk/dxzrzasr.json', 'init=/tmp/tmpfus6j9tk/a06351rk.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelu1z7mh36/prophet_model-20250715004304.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:04 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:05 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/d4f0oulx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fv5fxy7d.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33746', 'data', 'file=/tmp/tmpfus6j9tk/d4f0oulx.json', 'init=/tmp/tmpfus6j9tk/fv5fxy7d.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelcz6ig6en/prophet_model-20250715004305.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:05 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:05 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/imq2qafg.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/jo9g3phw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=99490', 'data', 'file=/tmp/tmpfus6j9tk/imq2qafg.json', 'init=/tmp/tmpfus6j9tk/jo9g3phw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model5i9g_vxa/prophet_model-20250715004305.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:05 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:07 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/nfl0ylxo.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tlw1crx5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=16722', 'data', 'file=/tmp/tmpfus6j9tk/nfl0ylxo.json', 'init=/tmp/tmpfus6j9tk/tlw1crx5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelt_ol7g18/prophet_model-20250715004307.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:07 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pmgc8dp8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zppti26d.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75306', 'data', 'file=/tmp/tmpfus6j9tk/pmgc8dp8.json', 'init=/tmp/tmpfus6j9tk/zppti26d.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeljmloscs3/prophet_model-20250715004308.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:08 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:09 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/70qwrq9c.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/rsj3sclo.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87568', 'data', 'file=/tmp/tmpfus6j9tk/70qwrq9c.json', 'init=/tmp/tmpfus6j9tk/rsj3sclo.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelu8q1vuaq/prophet_model-20250715004310.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:10 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7p0_v2n3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/oz4lbo96.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=41774', 'data', 'file=/tmp/tmpfus6j9tk/7p0_v2n3.json', 'init=/tmp/tmpfus6j9tk/oz4lbo96.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelynepoq0l/prophet_model-20250715004311.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:11 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:12 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h0yfq3m8.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/eax6y6s0.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=18946', 'data', 'file=/tmp/tmpfus6j9tk/h0yfq3m8.json', 'init=/tmp/tmpfus6j9tk/eax6y6s0.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model2iyuo_b0/prophet_model-20250715004312.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:12 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:13 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ghf9svz5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/6qjdxl_i.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=83581', 'data', 'file=/tmp/tmpfus6j9tk/ghf9svz5.json', 'init=/tmp/tmpfus6j9tk/6qjdxl_i.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model4vo1p5ba/prophet_model-20250715004313.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 00:43:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 00:43:14 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet Model Validation Metrics:
Country Target Model MAE RMSE
0 United States Life expectancy Prophet 1.258482 1.561422
1 United States Cardiovascular diseases Prophet 11.905792 11.974926
2 United States Diabetes Prophet 0.485264 0.489566
3 Germany Life expectancy Prophet 0.525293 0.612408
4 Germany Cardiovascular diseases Prophet 2.056124 2.125500
5 Germany Diabetes Prophet 2.756360 2.758175
6 Japan Life expectancy Prophet 0.493246 0.576474
7 Japan Cardiovascular diseases Prophet 7.661805 7.688441
8 Japan Diabetes Prophet 1.840223 1.841061
9 Brazil Life expectancy Prophet 1.966449 2.189554
10 Brazil Cardiovascular diseases Prophet 6.513924 6.547227
11 Brazil Diabetes Prophet 0.177966 0.186005
12 India Life expectancy Prophet 1.643531 2.475751
13 India Cardiovascular diseases Prophet 36.495881 37.420988
14 India Diabetes Prophet 0.825451 0.830592
15 Indonesia Life expectancy Prophet 0.997838 1.692886
16 Indonesia Cardiovascular diseases Prophet 7.145800 7.998086
17 Indonesia Diabetes Prophet 0.711375 0.712114
18 Nigeria Life expectancy Prophet 0.316993 0.369290
19 Nigeria Cardiovascular diseases Prophet 4.491838 4.498448
20 Nigeria Diabetes Prophet 0.127827 0.140798
21 Kenya Life expectancy Prophet 1.398121 1.670562
22 Kenya Cardiovascular diseases Prophet 0.932411 0.933468
23 Kenya Diabetes Prophet 3.478763 3.479734
24 Mexico Life expectancy Prophet 1.800713 2.428620
25 Mexico Cardiovascular diseases Prophet 0.680988 0.843693
26 Mexico Diabetes Prophet 0.798584 0.799705
27 Bangladesh Life expectancy Prophet 1.365206 1.676697
28 Bangladesh Cardiovascular diseases Prophet 6.977247 6.991238
29 Bangladesh Diabetes Prophet 2.987791 2.987844
# ## Actual and Forecast comparison 2021 - 2023 - - Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
def create_lag_features(df, target, lags=[1]):
for lag in lags:
df[f'lag_{lag}'] = df[target].shift(lag)
return df
results = [] # to store forecast results
metrics = [] # to store error metrics
rf_val_all = [] # to store validation predictions for RF
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].copy()
for target in target_columns:
if target not in df_country.columns:
continue
df_country = df_country.sort_values('Year').reset_index(drop=True)
df_country = create_lag_features(df_country, target, lags=[1])
df_country = df_country.dropna(subset=['lag_1']).reset_index(drop=True)
# Split into train and validation
train_df = df_country[df_country['Year'] <= train_end_year].copy()
val_df = df_country[df_country['Year'].isin(val_years)].copy()
feature_cols = ['lag_1']
X_train = train_df[feature_cols]
y_train = train_df[target]
X_val = val_df[feature_cols]
y_val = val_df[target]
try:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
# Predict on validation set
val_preds = rf_model.predict(X_val)
# Calculate metrics
mae = mean_absolute_error(y_val, val_preds)
rmse = mean_squared_error(y_val, val_preds) ** 0.5
metrics.append({
'Country': country,
'Target': target,
'Model': 'RandomForest',
'MAE': mae,
'RMSE': rmse
})
# Prepare validation results for concatenation later
val_df_rf = val_df[['Year', target]].copy()
val_df_rf['Forecast'] = val_preds
val_df_rf['Actual'] = val_df_rf[target]
val_df_rf['Model'] = 'RandomForest'
val_df_rf['Target'] = target
val_df_rf['Country'] = country
rf_val_all.append(val_df_rf[['Year', 'Actual', 'Forecast', 'Model', 'Target', 'Country']])
# Forecast future years recursively (2024 - 2075)
forecast_years_list = list(range(forecast_years[0], forecast_years[-1] + 1))
forecast_records = []
combined_df = pd.concat([train_df, val_df], ignore_index=True).sort_values('Year').reset_index(drop=True)
last_known_year = combined_df['Year'].max()
lag_1_val = combined_df.loc[combined_df['Year'] == last_known_year, target].values[0]
for year in forecast_years_list:
X_pred = pd.DataFrame({'lag_1': [lag_1_val]})
pred = rf_model.predict(X_pred)[0]
forecast_records.append({
'Year': year,
'Forecast': pred,
'Model': 'RandomForest',
'Target': target,
'Country': country
})
lag_1_val = pred
forecast_df = pd.DataFrame(forecast_records)
results.append(forecast_df)
except Exception as e:
print(f"Random Forest failed for {country}-{target}: {e}")
# After looping through countries and targets
df_metrics_rf = pd.DataFrame(metrics)
print("Random Forest Validation Metrics Summary:")
print(df_metrics_rf)
# Optional: save metrics to CSV
# df_metrics.to_csv('random_forest_metrics_summary.csv', index=False)
Random Forest Validation Metrics Summary:
Country Target Model MAE \
0 United States Life expectancy RandomForest 1.072507e+00
1 United States Cardiovascular diseases RandomForest 2.428093e+00
2 United States Diabetes RandomForest 7.993606e-15
3 Germany Life expectancy RandomForest 4.278093e-01
4 Germany Cardiovascular diseases RandomForest 1.222081e+00
5 Germany Diabetes RandomForest 0.000000e+00
6 Japan Life expectancy RandomForest 3.442503e-01
7 Japan Cardiovascular diseases RandomForest 2.097032e+00
8 Japan Diabetes RandomForest 4.440892e-15
9 Brazil Life expectancy RandomForest 1.296734e+00
10 Brazil Cardiovascular diseases RandomForest 2.116450e+00
11 Brazil Diabetes RandomForest 1.243450e-14
12 India Life expectancy RandomForest 2.957347e+00
13 India Cardiovascular diseases RandomForest 2.621741e+00
14 India Diabetes RandomForest 1.598721e-14
15 Indonesia Life expectancy RandomForest 2.251181e+00
16 Indonesia Cardiovascular diseases RandomForest 4.044649e+00
17 Indonesia Diabetes RandomForest 9.769963e-15
18 Nigeria Life expectancy RandomForest 1.009518e+00
19 Nigeria Cardiovascular diseases RandomForest 1.532458e+00
20 Nigeria Diabetes RandomForest 8.881784e-16
21 Kenya Life expectancy RandomForest 1.411552e+00
22 Kenya Cardiovascular diseases RandomForest 6.406522e-02
23 Kenya Diabetes RandomForest 0.000000e+00
24 Mexico Life expectancy RandomForest 1.982940e+00
25 Mexico Cardiovascular diseases RandomForest 1.750655e+00
26 Mexico Diabetes RandomForest 2.131628e-14
27 Bangladesh Life expectancy RandomForest 2.154439e+00
28 Bangladesh Cardiovascular diseases RandomForest 4.827035e-01
29 Bangladesh Diabetes RandomForest 1.776357e-14
RMSE
0 1.102994e+00
1 2.738661e+00
2 7.993606e-15
3 4.520784e-01
4 1.229778e+00
5 0.000000e+00
6 4.174739e-01
7 2.097032e+00
8 4.440892e-15
9 1.372957e+00
10 2.116839e+00
11 1.243450e-14
12 3.104097e+00
13 2.621741e+00
14 1.598721e-14
15 2.382435e+00
16 4.990321e+00
17 9.769963e-15
18 1.091537e+00
19 1.532458e+00
20 8.881784e-16
21 1.548049e+00
22 6.407365e-02
23 0.000000e+00
24 2.435678e+00
25 1.750655e+00
26 2.131628e-14
27 2.345411e+00
28 4.827035e-01
29 1.776357e-14
# Plot comparison for 4 countries
import matplotlib.pyplot as plt
import seaborn as sns
# Countries and years to plot
countries_to_plot = ['United States', 'Mexico', 'India', 'Japan']
years_to_plot = [2021, 2022, 2023]
# combine all result into one dataframe
val_results = pd.concat([
pd.concat(arima_val_all, ignore_index=True),
pd.concat(prophet_val_all, ignore_index=True),
pd.concat(rf_val_all, ignore_index=True)
], ignore_index=True)
val_results['Model'] = val_results['Model'].replace({'RandomForest': 'Random Forest'})
# Filter validation results for these countries and years
plot_df = val_results[
(val_results['Country'].isin(countries_to_plot)) &
(val_results['Year'].isin(years_to_plot))
].copy()
# Example for one target variable, say target = 'Cardiovascular diseases'
target_of_interest = 'Cardiovascular diseases'
plot_df = plot_df[plot_df['Target'] == target_of_interest]
# Set seaborn style
sns.set(style="whitegrid")
# Create a separate plot for each country with actual vs predicted lines for each model
fig, axs = plt.subplots(2, 2, figsize=(16, 10), sharey=True)
axs = axs.flatten()
for i, country in enumerate(countries_to_plot):
ax = axs[i]
country_data = plot_df[plot_df['Country'] == country]
# Plot Actual values
actual_data = country_data[['Year', 'Actual']].drop_duplicates()
ax.plot(actual_data['Year'], actual_data['Actual'], label='Actual', color='black', marker='o')
# Plot Forecasts from each model
for model in country_data['Model'].unique():
model_data = country_data[country_data['Model'] == model]
ax.plot(model_data['Year'], model_data['Forecast'], label=f'Forecast ({model})', marker='x')
ax.set_title(f'{country} - Actual vs Predicted ({target_of_interest})')
ax.set_xlabel('Year')
ax.set_ylabel('Value')
ax.legend()
ax.grid(True)
plt.tight_layout()
plt.show()
Metric Summary¶
# Metric summary
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Define your split years
train_end_year = 2020
val_years = [2021, 2022, 2023]
forecast_years = list(range(2024, 2076))
# Initialize containers
arima_val_all = []
prophet_val_all = []
rf_val_all = []
arima_forecast_all = []
prophet_forecast_all = []
rf_forecast_all = []
metrics = []
for country in selected_countries:
df_country = df_forecast_ready[df_forecast_ready['Country'] == country].copy()
for target in target_columns:
if target not in df_country.columns:
print(f"Skipping {target} for {country} — target not in dataframe columns.")
continue
# Prepare train, val, forecast splits
train_df = df_country[df_country['Year'] <= train_end_year].copy()
val_df = df_country[df_country['Year'].isin(val_years)].copy()
forecast_df = df_country[df_country['Year'].isin(forecast_years)].copy()
print(f"\nProcessing {country} - {target}")
print(f"Train shape: {train_df.shape}, Val shape: {val_df.shape}, Forecast shape: {forecast_df.shape}")
# --- ARIMA ---
try:
ts_train = train_df.set_index(pd.to_datetime(train_df['Year'], format='%Y'))[target].astype(float)
model_arima = ARIMA(ts_train, order=(1,1,1)).fit()
arima_val = model_arima.predict(start='2021', end='2023').reset_index()
arima_val.columns = ['Year', 'Forecast']
arima_val['Year'] = arima_val['Year'].dt.year
arima_val['Actual'] = val_df.set_index('Year')[target].reindex(arima_val['Year']).values
arima_val['Model'] = 'ARIMA'
arima_val['Target'] = target
arima_val['Country'] = country
arima_val_all.append(arima_val)
mae = mean_absolute_error(arima_val['Actual'], arima_val['Forecast'])
rmse = mean_squared_error(prophet_val['Actual'], prophet_val['Forecast']) ** 0.5
metrics.append({'Country': country, 'Target': target, 'Model': 'ARIMA', 'MAE': mae, 'RMSE': rmse})
print(f"ARIMA metrics: MAE={mae:.4f}, RMSE={rmse:.4f}")
arima_forecast = model_arima.predict(start='2024', end='2075').reset_index()
arima_forecast.columns = ['Year', 'Forecast']
arima_forecast['Year'] = arima_forecast['Year'].dt.year
arima_forecast['Model'] = 'ARIMA'
arima_forecast['Target'] = target
arima_forecast['Country'] = country
arima_forecast_all.append(arima_forecast)
except Exception as e:
print(f"ARIMA failed for {country}-{target}: {e}")
# --- Prophet ---
try:
prophet_df = train_df[['Year', target]].rename(columns={'Year': 'ds', target: 'y'})
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'], format='%Y')
model_prophet = Prophet()
model_prophet.fit(prophet_df)
future_df = pd.DataFrame({'ds': pd.to_datetime(list(range(2021, 2076)), format='%Y')})
forecast_prophet = model_prophet.predict(future_df)[['ds', 'yhat']]
forecast_prophet.columns = ['Year', 'Forecast']
forecast_prophet['Year'] = forecast_prophet['Year'].dt.year
prophet_val = forecast_prophet[forecast_prophet['Year'].isin(val_years)].copy()
prophet_val['Actual'] = val_df.set_index('Year')[target].reindex(prophet_val['Year']).values
prophet_val['Model'] = 'Prophet'
prophet_val['Target'] = target
prophet_val['Country'] = country
prophet_val_all.append(prophet_val)
mae = mean_absolute_error(prophet_val['Actual'], prophet_val['Forecast'])
rmse = mean_squared_error(prophet_val['Actual'], prophet_val['Forecast']) ** 0.5
metrics.append({'Country': country, 'Target': target, 'Model': 'Prophet', 'MAE': mae, 'RMSE': rmse})
print(f"Prophet metrics: MAE={mae:.4f}, RMSE={rmse:.4f}")
prophet_forecast = forecast_prophet[forecast_prophet['Year'].isin(forecast_years)].copy()
prophet_forecast['Model'] = 'Prophet'
prophet_forecast['Target'] = target
prophet_forecast['Country'] = country
prophet_forecast_all.append(prophet_forecast)
except Exception as e:
print(f"Prophet failed for {country}-{target}: {e}")
# --- Random Forest ---
try:
features = selected_features_dict.get(target, [])
features_available = [f for f in features if f in df_country.columns]
if len(features_available) == 0:
print(f"No features available for Random Forest on {country}-{target}, skipping.")
continue
X_train = train_df[features_available]
y_train = train_df[target]
X_val = val_df[features_available]
y_val = val_df[target]
X_forecast = forecast_df[features_available]
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
rf_val_pred = model_rf.predict(X_val)
rf_val = pd.DataFrame({
'Year': val_df['Year'].values,
'Forecast': rf_val_pred,
'Actual': val_df[target].values,
'Model': 'RandomForest',
'Target': target,
'Country': country
})
rf_val_all.append(rf_val)
mae = mean_absolute_error(rf_val['Actual'], rf_val['Forecast'])
rmse = mean_squared_error(prophet_val['Actual'], prophet_val['Forecast']) ** 0.5
metrics.append({'Country': country, 'Target': target, 'Model': 'RandomForest', 'MAE': mae, 'RMSE': rmse})
print(f"Random Forest metrics: MAE={mae:.4f}, RMSE={rmse:.4f}")
rf_forecast_pred = model_rf.predict(X_forecast)
rf_forecast = pd.DataFrame({
'Year': forecast_df['Year'].values,
'Forecast': rf_forecast_pred,
'Model': 'RandomForest',
'Target': target,
'Country': country
})
rf_forecast_all.append(rf_forecast)
except Exception as e:
print(f"Random Forest failed for {country}-{target}: {e}")
# Combine all validation results and forecasts
try:
val_results = pd.concat([
pd.concat(arima_val_all, ignore_index=True),
pd.concat(prophet_val_all, ignore_index=True),
pd.concat(rf_val_all, ignore_index=True)
], ignore_index=True)
results = pd.concat([
pd.concat(arima_forecast_all, ignore_index=True),
pd.concat(prophet_forecast_all, ignore_index=True),
pd.concat(rf_forecast_all, ignore_index=True)
], ignore_index=True)
metrics_df = pd.DataFrame(metrics)
print("\nFinal Metrics Summary:")
print(metrics_df)
except Exception as e:
print(f"Error combining results: {e}")
Processing United States - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lp2xr3es.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lwqopb8d.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55104', 'data', 'file=/tmp/tmpfus6j9tk/lp2xr3es.json', 'init=/tmp/tmpfus6j9tk/lwqopb8d.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model7sdvhfdr/prophet_model-20250715013244.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:44 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.6505, RMSE=3.4797
01:32:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.2585, RMSE=1.5614 Random Forest metrics: MAE=0.9315, RMSE=1.5614 Processing United States - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7dqv8po0.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dbhwgbpp.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=42318', 'data', 'file=/tmp/tmpfus6j9tk/7dqv8po0.json', 'init=/tmp/tmpfus6j9tk/dbhwgbpp.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model7_zxx1ja/prophet_model-20250715013246.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.1904, RMSE=1.5614
01:32:46 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=11.9058, RMSE=11.9749 Random Forest metrics: MAE=4.2320, RMSE=11.9749 Processing United States - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/m2y3n56p.json
ARIMA metrics: MAE=0.0075, RMSE=11.9749
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/44hwzitv.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=30025', 'data', 'file=/tmp/tmpfus6j9tk/m2y3n56p.json', 'init=/tmp/tmpfus6j9tk/44hwzitv.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modela3ocohtf/prophet_model-20250715013249.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:32:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.4853, RMSE=0.4896 Random Forest metrics: MAE=0.0047, RMSE=0.4896 Processing Germany - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3sz40meh.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zdalmpb5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28363', 'data', 'file=/tmp/tmpfus6j9tk/3sz40meh.json', 'init=/tmp/tmpfus6j9tk/zdalmpb5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model6e_5cj13/prophet_model-20250715013252.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:52 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.3551, RMSE=0.4896
01:32:53 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.5253, RMSE=0.6124 Random Forest metrics: MAE=0.3323, RMSE=0.6124 Processing Germany - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bzkp9vmx.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3uywljos.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=82198', 'data', 'file=/tmp/tmpfus6j9tk/bzkp9vmx.json', 'init=/tmp/tmpfus6j9tk/3uywljos.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelz_c3ydql/prophet_model-20250715013255.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:55 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.4339, RMSE=0.6124
01:32:57 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=2.0561, RMSE=2.1255 Random Forest metrics: MAE=0.8260, RMSE=2.1255 Processing Germany - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/k7b392mt.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7odsjvc7.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87263', 'data', 'file=/tmp/tmpfus6j9tk/k7b392mt.json', 'init=/tmp/tmpfus6j9tk/7odsjvc7.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelj1zwal7u/prophet_model-20250715013259.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:32:59 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.0000, RMSE=2.1255
01:33:00 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=2.7564, RMSE=2.7582 Random Forest metrics: MAE=0.0000, RMSE=2.7582 Processing Japan - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yhzhe44m.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/awb2xecp.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=37972', 'data', 'file=/tmp/tmpfus6j9tk/yhzhe44m.json', 'init=/tmp/tmpfus6j9tk/awb2xecp.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelxuowyewa/prophet_model-20250715013303.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:03 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.5732, RMSE=2.7582
01:33:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.4932, RMSE=0.5765 Random Forest metrics: MAE=0.2536, RMSE=0.5765 Processing Japan - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0dd6bo06.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/v09pw08m.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33836', 'data', 'file=/tmp/tmpfus6j9tk/0dd6bo06.json', 'init=/tmp/tmpfus6j9tk/v09pw08m.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0jq4m38_/prophet_model-20250715013306.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:06 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.5380, RMSE=0.5765
01:33:08 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=7.6618, RMSE=7.6884 Random Forest metrics: MAE=2.5516, RMSE=7.6884 Processing Japan - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/4lda9z4a.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/dgn96yeh.json
ARIMA metrics: MAE=0.0000, RMSE=7.6884
DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34795', 'data', 'file=/tmp/tmpfus6j9tk/4lda9z4a.json', 'init=/tmp/tmpfus6j9tk/dgn96yeh.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model4s_j3mqt/prophet_model-20250715013310.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:10 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:33:12 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.8402, RMSE=1.8411 Random Forest metrics: MAE=0.0000, RMSE=1.8411 Processing Brazil - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/zx2solcy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7dnc84x8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6853', 'data', 'file=/tmp/tmpfus6j9tk/zx2solcy.json', 'init=/tmp/tmpfus6j9tk/7dnc84x8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelb61jrxw_/prophet_model-20250715013313.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:13 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=2.4778, RMSE=1.8411
01:33:14 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.9664, RMSE=2.1896 Random Forest metrics: MAE=0.9191, RMSE=2.1896 Processing Brazil - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/sf1qeort.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ag2te2k8.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73017', 'data', 'file=/tmp/tmpfus6j9tk/sf1qeort.json', 'init=/tmp/tmpfus6j9tk/ag2te2k8.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0xtt_nse/prophet_model-20250715013316.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:16 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.8161, RMSE=2.1896
01:33:16 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=6.5139, RMSE=6.5472 Random Forest metrics: MAE=2.3173, RMSE=6.5472 Processing Brazil - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/akzbt40g.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/m11o07d5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=47290', 'data', 'file=/tmp/tmpfus6j9tk/akzbt40g.json', 'init=/tmp/tmpfus6j9tk/m11o07d5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model9qg4lv1o/prophet_model-20250715013317.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:17 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.0000, RMSE=6.5472
01:33:18 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.1780, RMSE=0.1860 Random Forest metrics: MAE=0.0293, RMSE=0.1860 Processing India - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/utdpvitf.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/cb80ulgt.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=91911', 'data', 'file=/tmp/tmpfus6j9tk/utdpvitf.json', 'init=/tmp/tmpfus6j9tk/cb80ulgt.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhx191ucv/prophet_model-20250715013319.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:19 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.5416, RMSE=0.1860
01:33:20 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.6435, RMSE=2.4758 Random Forest metrics: MAE=2.7177, RMSE=2.4758 Processing India - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/pki5n9rk.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/slkj_zeq.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=3742', 'data', 'file=/tmp/tmpfus6j9tk/pki5n9rk.json', 'init=/tmp/tmpfus6j9tk/slkj_zeq.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelos4p9bwh/prophet_model-20250715013323.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:23 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=19.1265, RMSE=2.4758
01:33:24 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=36.4959, RMSE=37.4210 Random Forest metrics: MAE=12.9403, RMSE=37.4210 Processing India - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lmsiao1r.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/23h9q0kv.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23117', 'data', 'file=/tmp/tmpfus6j9tk/lmsiao1r.json', 'init=/tmp/tmpfus6j9tk/23h9q0kv.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelzc9wfk9k/prophet_model-20250715013325.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:25 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.0185, RMSE=37.4210
01:33:26 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.8255, RMSE=0.8306 Random Forest metrics: MAE=0.0350, RMSE=0.8306 Processing Indonesia - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/lfjkf38d.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/ofcfzhc1.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=47535', 'data', 'file=/tmp/tmpfus6j9tk/lfjkf38d.json', 'init=/tmp/tmpfus6j9tk/ofcfzhc1.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model_fqirytp/prophet_model-20250715013328.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:28 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.8730, RMSE=0.8306
01:33:28 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.9978, RMSE=1.6929 Random Forest metrics: MAE=1.7877, RMSE=1.6929 Processing Indonesia - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/e0oyc6ue.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/_dfggz84.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6197', 'data', 'file=/tmp/tmpfus6j9tk/e0oyc6ue.json', 'init=/tmp/tmpfus6j9tk/_dfggz84.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model0wjif5fm/prophet_model-20250715013330.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:30 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=8.4755, RMSE=1.6929
01:33:30 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=7.1458, RMSE=7.9981
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/1pylhlao.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3m2qgyql.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=79342', 'data', 'file=/tmp/tmpfus6j9tk/1pylhlao.json', 'init=/tmp/tmpfus6j9tk/3m2qgyql.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelysu6ay46/prophet_model-20250715013331.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:31 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
Random Forest metrics: MAE=7.3518, RMSE=7.9981 Processing Indonesia - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54) ARIMA metrics: MAE=0.0000, RMSE=7.9981
01:33:32 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.7114, RMSE=0.7121 Random Forest metrics: MAE=0.0090, RMSE=0.7121 Processing Nigeria - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/hh1ew7tu.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8uj6t3mg.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=35486', 'data', 'file=/tmp/tmpfus6j9tk/hh1ew7tu.json', 'init=/tmp/tmpfus6j9tk/8uj6t3mg.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelaiihawkz/prophet_model-20250715013333.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:33 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.6316, RMSE=0.7121
01:33:34 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.3170, RMSE=0.3693 Random Forest metrics: MAE=6.3654, RMSE=0.3693 Processing Nigeria - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
ARIMA metrics: MAE=0.7163, RMSE=0.3693
DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/tgz_dxm5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/0rl8o_po.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=90553', 'data', 'file=/tmp/tmpfus6j9tk/tgz_dxm5.json', 'init=/tmp/tmpfus6j9tk/0rl8o_po.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelt9o2cdoe/prophet_model-20250715013336.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:33:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=4.4918, RMSE=4.4984 Random Forest metrics: MAE=1.7240, RMSE=4.4984 Processing Nigeria - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54) ARIMA metrics: MAE=0.0000, RMSE=4.4984
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/5_3v6uov.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/78c1ksvl.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=76930', 'data', 'file=/tmp/tmpfus6j9tk/5_3v6uov.json', 'init=/tmp/tmpfus6j9tk/78c1ksvl.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modeln_42r3nw/prophet_model-20250715013338.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:38 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:33:39 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.1278, RMSE=0.1408 Random Forest metrics: MAE=0.0040, RMSE=0.1408 Processing Kenya - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/bj5px6st.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/snqbvejs.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=7073', 'data', 'file=/tmp/tmpfus6j9tk/bj5px6st.json', 'init=/tmp/tmpfus6j9tk/snqbvejs.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modele3r0jpg1/prophet_model-20250715013340.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:40 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=2.7616, RMSE=0.1408
01:33:41 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.3981, RMSE=1.6706 Random Forest metrics: MAE=0.8407, RMSE=1.6706 Processing Kenya - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54) ARIMA metrics: MAE=0.1218, RMSE=1.6706
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/7g5ipfzp.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/mdx5ronw.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=39191', 'data', 'file=/tmp/tmpfus6j9tk/7g5ipfzp.json', 'init=/tmp/tmpfus6j9tk/mdx5ronw.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelhrbfsy8z/prophet_model-20250715013342.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:42 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:33:42 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.9324, RMSE=0.9335 Random Forest metrics: MAE=0.3225, RMSE=0.9335 Processing Kenya - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/h0e_y8b3.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/3iefok84.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=11807', 'data', 'file=/tmp/tmpfus6j9tk/h0e_y8b3.json', 'init=/tmp/tmpfus6j9tk/3iefok84.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelsb5gek4j/prophet_model-20250715013343.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:43 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.0003, RMSE=0.9335
01:33:45 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=3.4788, RMSE=3.4797 Random Forest metrics: MAE=0.0070, RMSE=3.4797 Processing Mexico - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/15_j0hfy.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/t4mdkvxb.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=57578', 'data', 'file=/tmp/tmpfus6j9tk/15_j0hfy.json', 'init=/tmp/tmpfus6j9tk/t4mdkvxb.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelpicl9u47/prophet_model-20250715013346.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:46 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=5.2414, RMSE=3.4797
01:33:47 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.8007, RMSE=2.4286 Random Forest metrics: MAE=2.4676, RMSE=2.4286 Processing Mexico - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/e0cajn71.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/8upf4pae.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=84019', 'data', 'file=/tmp/tmpfus6j9tk/e0cajn71.json', 'init=/tmp/tmpfus6j9tk/8upf4pae.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelqjqj3wl6/prophet_model-20250715013349.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:49 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=0.4802, RMSE=2.4286
01:33:50 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.6810, RMSE=0.8437 Random Forest metrics: MAE=2.4129, RMSE=0.8437 Processing Mexico - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54) ARIMA metrics: MAE=0.0000, RMSE=0.8437
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/yqfarhh5.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/36ddlap5.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73900', 'data', 'file=/tmp/tmpfus6j9tk/yqfarhh5.json', 'init=/tmp/tmpfus6j9tk/36ddlap5.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelw4_6jd9d/prophet_model-20250715013352.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:52 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 01:33:52 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=0.7986, RMSE=0.7997 Random Forest metrics: MAE=0.0170, RMSE=0.7997 Processing Bangladesh - Life expectancy Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/2_vx7q85.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/nrhsi97e.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73959', 'data', 'file=/tmp/tmpfus6j9tk/2_vx7q85.json', 'init=/tmp/tmpfus6j9tk/nrhsi97e.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelc5tym82v/prophet_model-20250715013353.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:53 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=2.0438, RMSE=0.7997
01:33:54 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=1.3652, RMSE=1.6767 Random Forest metrics: MAE=2.9804, RMSE=1.6767 Processing Bangladesh - Cardiovascular diseases Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/fq3vjj1l.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/59_g5ry6.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38935', 'data', 'file=/tmp/tmpfus6j9tk/fq3vjj1l.json', 'init=/tmp/tmpfus6j9tk/59_g5ry6.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_model1qulsnk7/prophet_model-20250715013355.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:55 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
ARIMA metrics: MAE=1.1376, RMSE=1.6767
01:33:56 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=6.9772, RMSE=6.9912
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/xe7uwlsw.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpfus6j9tk/kojnahqa.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=14413', 'data', 'file=/tmp/tmpfus6j9tk/xe7uwlsw.json', 'init=/tmp/tmpfus6j9tk/kojnahqa.json', 'output', 'file=/tmp/tmpfus6j9tk/prophet_modelmqz4v8_y/prophet_model-20250715013356.csv', 'method=optimize', 'algorithm=newton', 'iter=10000'] 01:33:56 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing
Random Forest metrics: MAE=0.4101, RMSE=6.9912 Processing Bangladesh - Diabetes Train shape: (71, 54), Val shape: (3, 54), Forecast shape: (50, 54) ARIMA metrics: MAE=0.0000, RMSE=6.9912
01:33:57 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
Prophet metrics: MAE=2.9878, RMSE=2.9878
Random Forest metrics: MAE=0.1027, RMSE=2.9878
Final Metrics Summary:
Country Target Model MAE RMSE
0 United States Life expectancy ARIMA 1.650538 3.479734
1 United States Life expectancy Prophet 1.258482 1.561422
2 United States Life expectancy RandomForest 0.931501 1.561422
3 United States Cardiovascular diseases ARIMA 1.190368 1.561422
4 United States Cardiovascular diseases Prophet 11.905792 11.974926
.. ... ... ... ... ...
85 Bangladesh Cardiovascular diseases Prophet 6.977247 6.991238
86 Bangladesh Cardiovascular diseases RandomForest 0.410136 6.991238
87 Bangladesh Diabetes ARIMA 0.000033 6.991238
88 Bangladesh Diabetes Prophet 2.987791 2.987844
89 Bangladesh Diabetes RandomForest 0.102667 2.987844
[90 rows x 5 columns]